260 lines
7.4 KiB
Python
260 lines
7.4 KiB
Python
"""
|
|
Product endpoints demonstrating streaming and memory-efficient operations
|
|
"""
|
|
from fastapi import APIRouter, Query, Response, HTTPException, BackgroundTasks
|
|
from fastapi.responses import StreamingResponse
|
|
from typing import Optional, List
|
|
import json
|
|
import csv
|
|
import io
|
|
from datetime import datetime
|
|
|
|
from sqrtspace_spacetime import Stream, external_sort
|
|
from sqrtspace_spacetime.checkpoint import CheckpointManager
|
|
|
|
from ..models import Product, ProductUpdate, BulkUpdateRequest, ImportStatus
|
|
from ..services.product_service import ProductService
|
|
from ..database import get_db
|
|
|
|
router = APIRouter()
|
|
product_service = ProductService()
|
|
checkpoint_manager = CheckpointManager()
|
|
|
|
|
|
@router.get("/")
|
|
async def list_products(
|
|
skip: int = Query(0, ge=0),
|
|
limit: int = Query(100, ge=1, le=1000),
|
|
category: Optional[str] = None,
|
|
min_price: Optional[float] = None,
|
|
max_price: Optional[float] = None
|
|
):
|
|
"""Get paginated list of products"""
|
|
filters = {}
|
|
if category:
|
|
filters['category'] = category
|
|
if min_price is not None:
|
|
filters['min_price'] = min_price
|
|
if max_price is not None:
|
|
filters['max_price'] = max_price
|
|
|
|
return await product_service.get_products(skip, limit, filters)
|
|
|
|
|
|
@router.get("/stream")
|
|
async def stream_products(
|
|
category: Optional[str] = None,
|
|
format: str = Query("ndjson", regex="^(ndjson|json)$")
|
|
):
|
|
"""
|
|
Stream all products as NDJSON or JSON array.
|
|
Memory-efficient streaming for large datasets.
|
|
"""
|
|
|
|
async def generate_ndjson():
|
|
async for product in product_service.stream_products(category):
|
|
yield json.dumps(product.dict()) + "\n"
|
|
|
|
async def generate_json():
|
|
yield "["
|
|
first = True
|
|
async for product in product_service.stream_products(category):
|
|
if not first:
|
|
yield ","
|
|
yield json.dumps(product.dict())
|
|
first = False
|
|
yield "]"
|
|
|
|
if format == "ndjson":
|
|
return StreamingResponse(
|
|
generate_ndjson(),
|
|
media_type="application/x-ndjson",
|
|
headers={"X-Accel-Buffering": "no"}
|
|
)
|
|
else:
|
|
return StreamingResponse(
|
|
generate_json(),
|
|
media_type="application/json",
|
|
headers={"X-Accel-Buffering": "no"}
|
|
)
|
|
|
|
|
|
@router.get("/export/csv")
|
|
async def export_csv(
|
|
category: Optional[str] = None,
|
|
columns: Optional[List[str]] = Query(None)
|
|
):
|
|
"""Export products as CSV with streaming"""
|
|
|
|
if not columns:
|
|
columns = ["id", "name", "sku", "category", "price", "stock", "created_at"]
|
|
|
|
async def generate():
|
|
output = io.StringIO()
|
|
writer = csv.DictWriter(output, fieldnames=columns)
|
|
|
|
# Write header
|
|
writer.writeheader()
|
|
output.seek(0)
|
|
yield output.read()
|
|
output.seek(0)
|
|
output.truncate()
|
|
|
|
# Stream products in batches
|
|
batch_count = 0
|
|
async for batch in product_service.stream_products_batched(category, batch_size=100):
|
|
for product in batch:
|
|
writer.writerow({col: getattr(product, col) for col in columns})
|
|
|
|
output.seek(0)
|
|
data = output.read()
|
|
output.seek(0)
|
|
output.truncate()
|
|
yield data
|
|
|
|
batch_count += 1
|
|
if batch_count % 10 == 0:
|
|
# Yield empty string to keep connection alive
|
|
yield ""
|
|
|
|
filename = f"products_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
|
|
|
return StreamingResponse(
|
|
generate(),
|
|
media_type="text/csv",
|
|
headers={
|
|
"Content-Disposition": f"attachment; filename={filename}",
|
|
"X-Accel-Buffering": "no"
|
|
}
|
|
)
|
|
|
|
|
|
@router.get("/search")
|
|
async def search_products(
|
|
q: str = Query(..., min_length=2),
|
|
sort_by: str = Query("relevance", regex="^(relevance|price_asc|price_desc|name)$"),
|
|
limit: int = Query(100, ge=1, le=1000)
|
|
):
|
|
"""
|
|
Search products with memory-efficient sorting.
|
|
Uses external sort for large result sets.
|
|
"""
|
|
results = await product_service.search_products(q, sort_by, limit)
|
|
|
|
# Use external sort if results are large
|
|
if len(results) > 1000:
|
|
sort_key = {
|
|
'price_asc': lambda x: x['price'],
|
|
'price_desc': lambda x: -x['price'],
|
|
'name': lambda x: x['name'],
|
|
'relevance': lambda x: -x['relevance_score']
|
|
}[sort_by]
|
|
|
|
results = external_sort(results, key_func=sort_key)
|
|
|
|
return {"results": results[:limit], "total": len(results)}
|
|
|
|
|
|
@router.post("/bulk-update")
|
|
async def bulk_update_prices(
|
|
request: BulkUpdateRequest,
|
|
background_tasks: BackgroundTasks
|
|
):
|
|
"""
|
|
Bulk update product prices with checkpointing.
|
|
Can be resumed if interrupted.
|
|
"""
|
|
job_id = f"bulk_update_{datetime.now().timestamp()}"
|
|
|
|
# Check for existing checkpoint
|
|
checkpoint = checkpoint_manager.restore(job_id)
|
|
if checkpoint:
|
|
return {
|
|
"message": "Resuming previous job",
|
|
"job_id": job_id,
|
|
"progress": checkpoint.get("progress", 0)
|
|
}
|
|
|
|
# Start background task
|
|
background_tasks.add_task(
|
|
product_service.bulk_update_prices,
|
|
request,
|
|
job_id
|
|
)
|
|
|
|
return {
|
|
"message": "Bulk update started",
|
|
"job_id": job_id,
|
|
"status_url": f"/products/bulk-update/{job_id}/status"
|
|
}
|
|
|
|
|
|
@router.get("/bulk-update/{job_id}/status")
|
|
async def bulk_update_status(job_id: str):
|
|
"""Check status of bulk update job"""
|
|
checkpoint = checkpoint_manager.restore(job_id)
|
|
|
|
if not checkpoint:
|
|
raise HTTPException(status_code=404, detail="Job not found")
|
|
|
|
return {
|
|
"job_id": job_id,
|
|
"status": checkpoint.get("status", "running"),
|
|
"progress": checkpoint.get("progress", 0),
|
|
"total": checkpoint.get("total", 0),
|
|
"updated": checkpoint.get("updated", 0),
|
|
"errors": checkpoint.get("errors", [])
|
|
}
|
|
|
|
|
|
@router.post("/import/csv")
|
|
async def import_csv(
|
|
file_url: str,
|
|
background_tasks: BackgroundTasks
|
|
):
|
|
"""Import products from CSV file"""
|
|
import_id = f"import_{datetime.now().timestamp()}"
|
|
|
|
background_tasks.add_task(
|
|
product_service.import_from_csv,
|
|
file_url,
|
|
import_id
|
|
)
|
|
|
|
return {
|
|
"message": "Import started",
|
|
"import_id": import_id,
|
|
"status_url": f"/products/import/{import_id}/status"
|
|
}
|
|
|
|
|
|
@router.get("/import/{import_id}/status")
|
|
async def import_status(import_id: str):
|
|
"""Check status of import job"""
|
|
status = await product_service.get_import_status(import_id)
|
|
|
|
if not status:
|
|
raise HTTPException(status_code=404, detail="Import job not found")
|
|
|
|
return status
|
|
|
|
|
|
@router.get("/statistics")
|
|
async def product_statistics():
|
|
"""
|
|
Get product statistics using memory-efficient aggregations.
|
|
Uses external grouping for large datasets.
|
|
"""
|
|
stats = await product_service.calculate_statistics()
|
|
|
|
return {
|
|
"total_products": stats["total_products"],
|
|
"total_value": stats["total_value"],
|
|
"by_category": stats["by_category"],
|
|
"price_distribution": stats["price_distribution"],
|
|
"stock_alerts": stats["stock_alerts"],
|
|
"processing_info": {
|
|
"memory_used_mb": stats["memory_used_mb"],
|
|
"external_operations": stats["external_operations"]
|
|
}
|
|
} |