mirror of
https://github.com/d3vyce/sqlalchemy-pgview.git
synced 2026-03-01 23:00:46 +01:00
308 lines
9.6 KiB
Markdown
308 lines
9.6 KiB
Markdown
# Materialized Views
|
|
|
|
Materialized views store the query results physically, making them faster to query but requiring explicit refresh to update.
|
|
|
|
## When to Use Materialized Views
|
|
|
|
| Use Case | Regular View | Materialized View |
|
|
|----------|--------------|-------------------|
|
|
| Complex aggregations queried frequently | Slow | **Fast** |
|
|
| Data that changes frequently | **Current** | Stale |
|
|
| Reports and dashboards | Slow | **Fast** |
|
|
| Real-time data requirements | **Yes** | No |
|
|
| Large dataset summaries | Slow | **Fast** |
|
|
|
|
## Creating Materialized Views (Declarative)
|
|
|
|
The recommended way to create materialized views is using the declarative pattern with multiple inheritance:
|
|
|
|
```python
|
|
from decimal import Decimal
|
|
from sqlalchemy import select, func, String, Numeric, Integer, DateTime
|
|
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
from sqlalchemy_pgview import MaterializedViewBase
|
|
|
|
class Base(DeclarativeBase):
|
|
pass
|
|
|
|
class Order(Base):
|
|
__tablename__ = "orders"
|
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
total: Mapped[Decimal] = mapped_column(Numeric(10, 2))
|
|
created_at: Mapped[DateTime] = mapped_column(DateTime)
|
|
|
|
# Define materialized view with data populated immediately
|
|
class MonthlySales(MaterializedViewBase, Base):
|
|
__tablename__ = "monthly_sales"
|
|
__select__ = select(
|
|
func.date_trunc('month', Order.created_at).label("month"),
|
|
func.count(Order.id).label("order_count"),
|
|
func.sum(Order.total).label("revenue"),
|
|
).group_by(func.date_trunc('month', Order.created_at))
|
|
__with_data__ = True # Default: populate on creation
|
|
|
|
# Create tables and views together
|
|
engine = create_engine("postgresql://user:pass@localhost/mydb")
|
|
Base.metadata.create_all(engine)
|
|
```
|
|
|
|
### MaterializedViewBase Attributes
|
|
|
|
| Attribute | Required | Description |
|
|
|-----------|----------|-------------|
|
|
| `__tablename__` | Yes | Name of the materialized view in the database |
|
|
| `__select__` | Yes | SELECT statement that defines the view |
|
|
| `__schema__` | No | Database schema (default: None/public) |
|
|
| `__with_data__` | No | Populate data on creation (default: True) |
|
|
|
|
## Querying Materialized Views
|
|
|
|
```python
|
|
from sqlalchemy import select
|
|
|
|
with engine.connect() as conn:
|
|
# Query all rows
|
|
result = conn.execute(select(MonthlySales.as_table())).fetchall()
|
|
|
|
# Query with filtering using .c accessor
|
|
result = conn.execute(
|
|
select(MonthlySales.as_table())
|
|
.where(MonthlySales.c.order_count > 10)
|
|
.order_by(MonthlySales.c.revenue.desc())
|
|
).fetchall()
|
|
|
|
for row in result:
|
|
print(f"{row.month}: {row.order_count} orders, ${row.revenue}")
|
|
```
|
|
|
|
## Refreshing Materialized Views
|
|
|
|
Materialized views become stale when underlying data changes. Refresh to update:
|
|
|
|
```python
|
|
with engine.begin() as conn:
|
|
# Using the class method (declarative)
|
|
MonthlySales.refresh(conn)
|
|
|
|
# Or using the view object
|
|
MonthlySales.as_view().refresh(conn)
|
|
|
|
# Concurrent refresh (requires a unique index, doesn't block reads)
|
|
MonthlySales.refresh(conn, concurrently=True)
|
|
|
|
# Refresh without data (empties the view)
|
|
MonthlySales.refresh(conn, with_data=False)
|
|
```
|
|
|
|
!!! warning "Concurrent Refresh Requirements"
|
|
`REFRESH MATERIALIZED VIEW CONCURRENTLY` requires:
|
|
|
|
- A unique index on the materialized view
|
|
- The view must already contain data (can't be empty)
|
|
|
|
## Stale Data Behavior
|
|
|
|
!!! warning "Materialized Views Don't Auto-Update"
|
|
Unlike regular views, materialized views show **stale data** until refreshed.
|
|
|
|
```python
|
|
with engine.begin() as conn:
|
|
# Insert new data into underlying table
|
|
conn.execute(insert(Order.__table__).values(...))
|
|
|
|
# Materialized view still shows old data!
|
|
# Refresh to see new data
|
|
MonthlySales.refresh(conn)
|
|
|
|
# Now the view reflects the changes
|
|
result = conn.execute(select(MonthlySales.as_table())).fetchall()
|
|
```
|
|
|
|
## Auto-Refresh (ORM)
|
|
|
|
SQLAlchemy-PGView can automatically refresh materialized views when watched tables are modified via ORM:
|
|
|
|
```python
|
|
from sqlalchemy.orm import Session
|
|
|
|
# Enable auto-refresh when Order table changes
|
|
MonthlySales.auto_refresh_on(Session, Order.__table__)
|
|
|
|
# Now ORM commits automatically refresh the view
|
|
with Session(engine) as session:
|
|
session.add(Order(total=Decimal("99.99"), created_at=datetime.now()))
|
|
session.commit() # MonthlySales is refreshed automatically!
|
|
```
|
|
|
|
!!! tip "Custom Session Class"
|
|
For isolated testing or specific workflows, create a custom Session subclass:
|
|
|
|
```python
|
|
class AnalyticsSession(Session):
|
|
pass
|
|
|
|
MonthlySales.auto_refresh_on(AnalyticsSession, Order.__table__)
|
|
```
|
|
|
|
## Auto-Refresh (Core)
|
|
|
|
For SQLAlchemy Core (without ORM), use `AutoRefreshContext`:
|
|
|
|
```python
|
|
from sqlalchemy_pgview import AutoRefreshContext
|
|
|
|
# Get the underlying view and table objects
|
|
mview = MonthlySales.as_view()
|
|
orders_table = Order.__table__
|
|
|
|
with engine.begin() as conn:
|
|
with AutoRefreshContext(conn, mview, orders_table):
|
|
conn.execute(insert(orders_table).values(total=100, created_at=datetime.now()))
|
|
conn.execute(insert(orders_table).values(total=200, created_at=datetime.now()))
|
|
# View is refreshed when exiting the context
|
|
|
|
# View now shows updated data
|
|
result = conn.execute(select(MonthlySales.as_table())).fetchone()
|
|
```
|
|
|
|
!!! warning "Auto-Refresh Considerations"
|
|
- **Performance**: Each commit triggers a refresh. For high-frequency writes, use scheduled refresh instead.
|
|
- **Exceptions**: `AutoRefreshContext` only refreshes on successful exit (no exception).
|
|
- **ORM-only**: `auto_refresh_on()` only catches changes made through the ORM Session, not raw SQL.
|
|
|
|
## Refresh Strategies
|
|
|
|
| Data Freshness Requirement | Strategy |
|
|
|---------------------------|----------|
|
|
| Real-time (careful!) | Auto-refresh on commit |
|
|
| Minutes | Frequent scheduled refresh |
|
|
| Hours | Hourly cron job |
|
|
| Daily | Nightly refresh |
|
|
| On-demand | Manual refresh before queries |
|
|
|
|
## Imperative API (Alternative)
|
|
|
|
For SQLAlchemy Core or when declarative style isn't suitable:
|
|
|
|
```python
|
|
from sqlalchemy import MetaData, Table, Column, Integer, Numeric, DateTime, select, func
|
|
from sqlalchemy_pgview import MaterializedView, CreateMaterializedView, RefreshMaterializedView
|
|
|
|
metadata = MetaData()
|
|
|
|
orders = Table(
|
|
"orders", metadata,
|
|
Column("id", Integer, primary_key=True),
|
|
Column("total", Numeric(10, 2)),
|
|
Column("created_at", DateTime),
|
|
)
|
|
|
|
# Define materialized view
|
|
monthly_sales = MaterializedView(
|
|
"monthly_sales",
|
|
select(
|
|
func.date_trunc('month', orders.c.created_at).label("month"),
|
|
func.count(orders.c.id).label("order_count"),
|
|
func.sum(orders.c.total).label("revenue"),
|
|
).group_by(func.date_trunc('month', orders.c.created_at)),
|
|
metadata=metadata,
|
|
with_data=True,
|
|
)
|
|
|
|
# Create tables and views
|
|
metadata.create_all(engine)
|
|
|
|
# Refresh
|
|
with engine.begin() as conn:
|
|
monthly_sales.refresh(conn)
|
|
# Or using DDL directly
|
|
conn.execute(RefreshMaterializedView(monthly_sales, concurrently=True))
|
|
```
|
|
|
|
## Dropping Materialized Views
|
|
|
|
Materialized views are automatically dropped when using `metadata.drop_all()`. For manual control:
|
|
|
|
```python
|
|
from sqlalchemy_pgview import DropMaterializedView
|
|
|
|
with engine.begin() as conn:
|
|
# Drop using view object
|
|
conn.execute(DropMaterializedView(MonthlySales.as_view()))
|
|
|
|
# Drop by name with options
|
|
conn.execute(DropMaterializedView("old_mv", if_exists=True))
|
|
|
|
# Drop with cascade (drops dependent objects)
|
|
conn.execute(DropMaterializedView("base_mv", cascade=True))
|
|
```
|
|
|
|
## List Registered Views
|
|
|
|
```python
|
|
from sqlalchemy_pgview import get_views
|
|
|
|
# Get all registered views from metadata (includes materialized views)
|
|
views = get_views(Base.metadata)
|
|
print(views) # {'monthly_sales': <MaterializedView 'monthly_sales'>}
|
|
```
|
|
|
|
## Complete Example
|
|
|
|
```python
|
|
from decimal import Decimal
|
|
from datetime import datetime
|
|
from sqlalchemy import create_engine, select, func, String, Numeric, Integer, DateTime
|
|
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, Session
|
|
from sqlalchemy_pgview import MaterializedViewBase
|
|
|
|
class Base(DeclarativeBase):
|
|
pass
|
|
|
|
class Product(Base):
|
|
__tablename__ = "products"
|
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
name: Mapped[str] = mapped_column(String(100))
|
|
category: Mapped[str] = mapped_column(String(50))
|
|
|
|
class Sale(Base):
|
|
__tablename__ = "sales"
|
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
product_id: Mapped[int] = mapped_column(Integer)
|
|
quantity: Mapped[int] = mapped_column(Integer)
|
|
amount: Mapped[Decimal] = mapped_column(Numeric(10, 2))
|
|
sold_at: Mapped[datetime] = mapped_column(DateTime)
|
|
|
|
class CategorySales(MaterializedViewBase, Base):
|
|
__tablename__ = "category_sales"
|
|
__select__ = select(
|
|
Product.category,
|
|
func.count(Sale.id).label("sale_count"),
|
|
func.sum(Sale.quantity).label("total_quantity"),
|
|
func.sum(Sale.amount).label("total_revenue"),
|
|
).select_from(
|
|
Sale.__table__.join(Product.__table__, Sale.product_id == Product.id)
|
|
).group_by(Product.category)
|
|
|
|
# Setup
|
|
engine = create_engine("postgresql://...")
|
|
Base.metadata.create_all(engine)
|
|
|
|
# Enable auto-refresh
|
|
CategorySales.auto_refresh_on(Session, Sale.__table__)
|
|
|
|
# Add data - view refreshes automatically on commit
|
|
with Session(engine) as session:
|
|
session.add(Sale(product_id=1, quantity=5, amount=Decimal("49.95"), sold_at=datetime.now()))
|
|
session.commit()
|
|
|
|
# Query fresh data
|
|
with engine.connect() as conn:
|
|
results = conn.execute(
|
|
select(CategorySales.as_table()).order_by(CategorySales.c.total_revenue.desc())
|
|
).fetchall()
|
|
|
|
for row in results:
|
|
print(f"{row.category}: {row.sale_count} sales, ${row.total_revenue}")
|
|
```
|