mirror of
https://github.com/d3vyce/sqlalchemy-pgview.git
synced 2026-03-01 23:10:46 +01:00
Initial commit
This commit is contained in:
307
docs/guide/materialized-views.md
Normal file
307
docs/guide/materialized-views.md
Normal file
@@ -0,0 +1,307 @@
|
||||
# Materialized Views
|
||||
|
||||
Materialized views store the query results physically, making them faster to query but requiring explicit refresh to update.
|
||||
|
||||
## When to Use Materialized Views
|
||||
|
||||
| Use Case | Regular View | Materialized View |
|
||||
|----------|--------------|-------------------|
|
||||
| Complex aggregations queried frequently | Slow | **Fast** |
|
||||
| Data that changes frequently | **Current** | Stale |
|
||||
| Reports and dashboards | Slow | **Fast** |
|
||||
| Real-time data requirements | **Yes** | No |
|
||||
| Large dataset summaries | Slow | **Fast** |
|
||||
|
||||
## Creating Materialized Views (Declarative)
|
||||
|
||||
The recommended way to create materialized views is using the declarative pattern with multiple inheritance:
|
||||
|
||||
```python
|
||||
from decimal import Decimal
|
||||
from sqlalchemy import select, func, String, Numeric, Integer, DateTime
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
||||
from sqlalchemy_pgview import MaterializedViewBase
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
class Order(Base):
|
||||
__tablename__ = "orders"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
total: Mapped[Decimal] = mapped_column(Numeric(10, 2))
|
||||
created_at: Mapped[DateTime] = mapped_column(DateTime)
|
||||
|
||||
# Define materialized view with data populated immediately
|
||||
class MonthlySales(MaterializedViewBase, Base):
|
||||
__tablename__ = "monthly_sales"
|
||||
__select__ = select(
|
||||
func.date_trunc('month', Order.created_at).label("month"),
|
||||
func.count(Order.id).label("order_count"),
|
||||
func.sum(Order.total).label("revenue"),
|
||||
).group_by(func.date_trunc('month', Order.created_at))
|
||||
__with_data__ = True # Default: populate on creation
|
||||
|
||||
# Create tables and views together
|
||||
engine = create_engine("postgresql://user:pass@localhost/mydb")
|
||||
Base.metadata.create_all(engine)
|
||||
```
|
||||
|
||||
### MaterializedViewBase Attributes
|
||||
|
||||
| Attribute | Required | Description |
|
||||
|-----------|----------|-------------|
|
||||
| `__tablename__` | Yes | Name of the materialized view in the database |
|
||||
| `__select__` | Yes | SELECT statement that defines the view |
|
||||
| `__schema__` | No | Database schema (default: None/public) |
|
||||
| `__with_data__` | No | Populate data on creation (default: True) |
|
||||
|
||||
## Querying Materialized Views
|
||||
|
||||
```python
|
||||
from sqlalchemy import select
|
||||
|
||||
with engine.connect() as conn:
|
||||
# Query all rows
|
||||
result = conn.execute(select(MonthlySales.as_table())).fetchall()
|
||||
|
||||
# Query with filtering using .c accessor
|
||||
result = conn.execute(
|
||||
select(MonthlySales.as_table())
|
||||
.where(MonthlySales.c.order_count > 10)
|
||||
.order_by(MonthlySales.c.revenue.desc())
|
||||
).fetchall()
|
||||
|
||||
for row in result:
|
||||
print(f"{row.month}: {row.order_count} orders, ${row.revenue}")
|
||||
```
|
||||
|
||||
## Refreshing Materialized Views
|
||||
|
||||
Materialized views become stale when underlying data changes. Refresh to update:
|
||||
|
||||
```python
|
||||
with engine.begin() as conn:
|
||||
# Using the class method (declarative)
|
||||
MonthlySales.refresh(conn)
|
||||
|
||||
# Or using the view object
|
||||
MonthlySales.as_view().refresh(conn)
|
||||
|
||||
# Concurrent refresh (requires a unique index, doesn't block reads)
|
||||
MonthlySales.refresh(conn, concurrently=True)
|
||||
|
||||
# Refresh without data (empties the view)
|
||||
MonthlySales.refresh(conn, with_data=False)
|
||||
```
|
||||
|
||||
!!! warning "Concurrent Refresh Requirements"
|
||||
`REFRESH MATERIALIZED VIEW CONCURRENTLY` requires:
|
||||
|
||||
- A unique index on the materialized view
|
||||
- The view must already contain data (can't be empty)
|
||||
|
||||
## Stale Data Behavior
|
||||
|
||||
!!! warning "Materialized Views Don't Auto-Update"
|
||||
Unlike regular views, materialized views show **stale data** until refreshed.
|
||||
|
||||
```python
|
||||
with engine.begin() as conn:
|
||||
# Insert new data into underlying table
|
||||
conn.execute(insert(Order.__table__).values(...))
|
||||
|
||||
# Materialized view still shows old data!
|
||||
# Refresh to see new data
|
||||
MonthlySales.refresh(conn)
|
||||
|
||||
# Now the view reflects the changes
|
||||
result = conn.execute(select(MonthlySales.as_table())).fetchall()
|
||||
```
|
||||
|
||||
## Auto-Refresh (ORM)
|
||||
|
||||
SQLAlchemy-PGView can automatically refresh materialized views when watched tables are modified via ORM:
|
||||
|
||||
```python
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
# Enable auto-refresh when Order table changes
|
||||
MonthlySales.auto_refresh_on(Session, Order.__table__)
|
||||
|
||||
# Now ORM commits automatically refresh the view
|
||||
with Session(engine) as session:
|
||||
session.add(Order(total=Decimal("99.99"), created_at=datetime.now()))
|
||||
session.commit() # MonthlySales is refreshed automatically!
|
||||
```
|
||||
|
||||
!!! tip "Custom Session Class"
|
||||
For isolated testing or specific workflows, create a custom Session subclass:
|
||||
|
||||
```python
|
||||
class AnalyticsSession(Session):
|
||||
pass
|
||||
|
||||
MonthlySales.auto_refresh_on(AnalyticsSession, Order.__table__)
|
||||
```
|
||||
|
||||
## Auto-Refresh (Core)
|
||||
|
||||
For SQLAlchemy Core (without ORM), use `AutoRefreshContext`:
|
||||
|
||||
```python
|
||||
from sqlalchemy_pgview import AutoRefreshContext
|
||||
|
||||
# Get the underlying view and table objects
|
||||
mview = MonthlySales.as_view()
|
||||
orders_table = Order.__table__
|
||||
|
||||
with engine.begin() as conn:
|
||||
with AutoRefreshContext(conn, mview, orders_table):
|
||||
conn.execute(insert(orders_table).values(total=100, created_at=datetime.now()))
|
||||
conn.execute(insert(orders_table).values(total=200, created_at=datetime.now()))
|
||||
# View is refreshed when exiting the context
|
||||
|
||||
# View now shows updated data
|
||||
result = conn.execute(select(MonthlySales.as_table())).fetchone()
|
||||
```
|
||||
|
||||
!!! warning "Auto-Refresh Considerations"
|
||||
- **Performance**: Each commit triggers a refresh. For high-frequency writes, use scheduled refresh instead.
|
||||
- **Exceptions**: `AutoRefreshContext` only refreshes on successful exit (no exception).
|
||||
- **ORM-only**: `auto_refresh_on()` only catches changes made through the ORM Session, not raw SQL.
|
||||
|
||||
## Refresh Strategies
|
||||
|
||||
| Data Freshness Requirement | Strategy |
|
||||
|---------------------------|----------|
|
||||
| Real-time (careful!) | Auto-refresh on commit |
|
||||
| Minutes | Frequent scheduled refresh |
|
||||
| Hours | Hourly cron job |
|
||||
| Daily | Nightly refresh |
|
||||
| On-demand | Manual refresh before queries |
|
||||
|
||||
## Imperative API (Alternative)
|
||||
|
||||
For SQLAlchemy Core or when declarative style isn't suitable:
|
||||
|
||||
```python
|
||||
from sqlalchemy import MetaData, Table, Column, Integer, Numeric, DateTime, select, func
|
||||
from sqlalchemy_pgview import MaterializedView, CreateMaterializedView, RefreshMaterializedView
|
||||
|
||||
metadata = MetaData()
|
||||
|
||||
orders = Table(
|
||||
"orders", metadata,
|
||||
Column("id", Integer, primary_key=True),
|
||||
Column("total", Numeric(10, 2)),
|
||||
Column("created_at", DateTime),
|
||||
)
|
||||
|
||||
# Define materialized view
|
||||
monthly_sales = MaterializedView(
|
||||
"monthly_sales",
|
||||
select(
|
||||
func.date_trunc('month', orders.c.created_at).label("month"),
|
||||
func.count(orders.c.id).label("order_count"),
|
||||
func.sum(orders.c.total).label("revenue"),
|
||||
).group_by(func.date_trunc('month', orders.c.created_at)),
|
||||
metadata=metadata,
|
||||
with_data=True,
|
||||
)
|
||||
|
||||
# Create tables and views
|
||||
metadata.create_all(engine)
|
||||
|
||||
# Refresh
|
||||
with engine.begin() as conn:
|
||||
monthly_sales.refresh(conn)
|
||||
# Or using DDL directly
|
||||
conn.execute(RefreshMaterializedView(monthly_sales, concurrently=True))
|
||||
```
|
||||
|
||||
## Dropping Materialized Views
|
||||
|
||||
Materialized views are automatically dropped when using `metadata.drop_all()`. For manual control:
|
||||
|
||||
```python
|
||||
from sqlalchemy_pgview import DropMaterializedView
|
||||
|
||||
with engine.begin() as conn:
|
||||
# Drop using view object
|
||||
conn.execute(DropMaterializedView(MonthlySales.as_view()))
|
||||
|
||||
# Drop by name with options
|
||||
conn.execute(DropMaterializedView("old_mv", if_exists=True))
|
||||
|
||||
# Drop with cascade (drops dependent objects)
|
||||
conn.execute(DropMaterializedView("base_mv", cascade=True))
|
||||
```
|
||||
|
||||
## List Registered Views
|
||||
|
||||
```python
|
||||
from sqlalchemy_pgview import get_views
|
||||
|
||||
# Get all registered views from metadata (includes materialized views)
|
||||
views = get_views(Base.metadata)
|
||||
print(views) # {'monthly_sales': <MaterializedView 'monthly_sales'>}
|
||||
```
|
||||
|
||||
## Complete Example
|
||||
|
||||
```python
|
||||
from decimal import Decimal
|
||||
from datetime import datetime
|
||||
from sqlalchemy import create_engine, select, func, String, Numeric, Integer, DateTime
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, Session
|
||||
from sqlalchemy_pgview import MaterializedViewBase
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
class Product(Base):
|
||||
__tablename__ = "products"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String(100))
|
||||
category: Mapped[str] = mapped_column(String(50))
|
||||
|
||||
class Sale(Base):
|
||||
__tablename__ = "sales"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
product_id: Mapped[int] = mapped_column(Integer)
|
||||
quantity: Mapped[int] = mapped_column(Integer)
|
||||
amount: Mapped[Decimal] = mapped_column(Numeric(10, 2))
|
||||
sold_at: Mapped[datetime] = mapped_column(DateTime)
|
||||
|
||||
class CategorySales(MaterializedViewBase, Base):
|
||||
__tablename__ = "category_sales"
|
||||
__select__ = select(
|
||||
Product.category,
|
||||
func.count(Sale.id).label("sale_count"),
|
||||
func.sum(Sale.quantity).label("total_quantity"),
|
||||
func.sum(Sale.amount).label("total_revenue"),
|
||||
).select_from(
|
||||
Sale.__table__.join(Product.__table__, Sale.product_id == Product.id)
|
||||
).group_by(Product.category)
|
||||
|
||||
# Setup
|
||||
engine = create_engine("postgresql://...")
|
||||
Base.metadata.create_all(engine)
|
||||
|
||||
# Enable auto-refresh
|
||||
CategorySales.auto_refresh_on(Session, Sale.__table__)
|
||||
|
||||
# Add data - view refreshes automatically on commit
|
||||
with Session(engine) as session:
|
||||
session.add(Sale(product_id=1, quantity=5, amount=Decimal("49.95"), sold_at=datetime.now()))
|
||||
session.commit()
|
||||
|
||||
# Query fresh data
|
||||
with engine.connect() as conn:
|
||||
results = conn.execute(
|
||||
select(CategorySales.as_table()).order_by(CategorySales.c.total_revenue.desc())
|
||||
).fetchall()
|
||||
|
||||
for row in results:
|
||||
print(f"{row.category}: {row.sale_count} sales, ${row.total_revenue}")
|
||||
```
|
||||
Reference in New Issue
Block a user