sqrtspace-dotnet/samples/SampleWebApi/Services/OrderAnalyticsService.cs
2025-07-20 03:41:39 -04:00

473 lines
18 KiB
C#

using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Options;
using SqrtSpace.SpaceTime.Core;
using SqrtSpace.SpaceTime.EntityFramework;
using SqrtSpace.SpaceTime.Linq;
using SampleWebApi.Data;
using SampleWebApi.Models;
using System.Diagnostics;
namespace SampleWebApi.Services;
public interface IOrderAnalyticsService
{
Task<IEnumerable<CategoryRevenue>> GetRevenueByCategoryAsync(DateTime? startDate, DateTime? endDate);
Task<IEnumerable<CustomerSummary>> GetTopCustomersAsync(int top, DateTime? since);
IAsyncEnumerable<RealTimeAnalytics> StreamRealTimeAnalyticsAsync(CancellationToken cancellationToken);
Task<ReportResult> GenerateComplexReportAsync(ReportRequest request, string reportId, ReportState? previousState, CheckpointManager? checkpoint);
Task<PatternAnalysisResult> AnalyzeOrderPatternsAsync(PatternAnalysisRequest request);
MemoryStatistics GetMemoryStatistics();
}
public class OrderAnalyticsService : IOrderAnalyticsService
{
private readonly SampleDbContext _context;
private readonly ILogger<OrderAnalyticsService> _logger;
private readonly MemoryOptions _memoryOptions;
private static readonly MemoryStatistics _memoryStats = new();
public OrderAnalyticsService(
SampleDbContext context,
ILogger<OrderAnalyticsService> logger,
IOptions<MemoryOptions> memoryOptions)
{
_context = context;
_logger = logger;
_memoryOptions = memoryOptions.Value;
}
public async Task<IEnumerable<CategoryRevenue>> GetRevenueByCategoryAsync(DateTime? startDate, DateTime? endDate)
{
var query = _context.OrderItems
.Include(oi => oi.Product)
.Include(oi => oi.Order)
.AsQueryable();
if (startDate.HasValue)
query = query.Where(oi => oi.Order.OrderDate >= startDate.Value);
if (endDate.HasValue)
query = query.Where(oi => oi.Order.OrderDate <= endDate.Value);
var itemCount = await query.CountAsync();
_logger.LogInformation("Processing revenue for {count} order items", itemCount);
// Use external grouping for large datasets
if (itemCount > 50000)
{
_logger.LogInformation("Using external grouping for revenue calculation");
_memoryStats.ExternalSortOperations++;
var categoryRevenue = new Dictionary<string, (decimal revenue, int count)>();
// Process in memory-efficient batches
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var item in batch)
{
var category = item.Product.Category;
if (!categoryRevenue.ContainsKey(category))
{
categoryRevenue[category] = (0, 0);
}
var current = categoryRevenue[category];
categoryRevenue[category] = (current.revenue + item.TotalPrice, current.count + 1);
}
}
return categoryRevenue.Select(kvp => new CategoryRevenue
{
Category = kvp.Key,
TotalRevenue = kvp.Value.revenue,
OrderCount = kvp.Value.count,
AverageOrderValue = kvp.Value.count > 0 ? kvp.Value.revenue / kvp.Value.count : 0
}).OrderByDescending(c => c.TotalRevenue);
}
else
{
// Use in-memory grouping for smaller datasets
var grouped = await query
.GroupBy(oi => oi.Product.Category)
.Select(g => new CategoryRevenue
{
Category = g.Key,
TotalRevenue = g.Sum(oi => oi.TotalPrice),
OrderCount = g.Select(oi => oi.OrderId).Distinct().Count(),
AverageOrderValue = g.Average(oi => oi.TotalPrice)
})
.OrderByDescending(c => c.TotalRevenue)
.ToListAsync();
return grouped;
}
}
public async Task<IEnumerable<CustomerSummary>> GetTopCustomersAsync(int top, DateTime? since)
{
var query = _context.Orders.AsQueryable();
if (since.HasValue)
query = query.Where(o => o.OrderDate >= since.Value);
var orderCount = await query.CountAsync();
_logger.LogInformation("Finding top {top} customers from {count} orders", top, orderCount);
// For large datasets, use external sorting
if (orderCount > 100000)
{
_logger.LogInformation("Using external sorting for top customers");
_memoryStats.ExternalSortOperations++;
var customerData = new Dictionary<string, (decimal total, int count, DateTime first, DateTime last)>();
// Aggregate customer data in batches
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var order in batch)
{
if (!customerData.ContainsKey(order.CustomerId))
{
customerData[order.CustomerId] = (0, 0, order.OrderDate, order.OrderDate);
}
var current = customerData[order.CustomerId];
customerData[order.CustomerId] = (
current.total + order.TotalAmount,
current.count + 1,
order.OrderDate < current.first ? order.OrderDate : current.first,
order.OrderDate > current.last ? order.OrderDate : current.last
);
}
}
// Get customer details
var customerIds = customerData.Keys.ToList();
var customers = await _context.Customers
.Where(c => customerIds.Contains(c.Id))
.ToDictionaryAsync(c => c.Id, c => c.Name);
// Sort and take top N
return customerData
.OrderByDescending(kvp => kvp.Value.total)
.Take(top)
.Select(kvp => new CustomerSummary
{
CustomerId = kvp.Key,
CustomerName = customers.GetValueOrDefault(kvp.Key, "Unknown"),
TotalOrders = kvp.Value.count,
TotalSpent = kvp.Value.total,
AverageOrderValue = kvp.Value.total / kvp.Value.count,
FirstOrderDate = kvp.Value.first,
LastOrderDate = kvp.Value.last
});
}
else
{
// Use in-memory processing for smaller datasets
var topCustomers = await query
.GroupBy(o => o.CustomerId)
.Select(g => new
{
CustomerId = g.Key,
TotalSpent = g.Sum(o => o.TotalAmount),
OrderCount = g.Count(),
FirstOrder = g.Min(o => o.OrderDate),
LastOrder = g.Max(o => o.OrderDate)
})
.OrderByDescending(c => c.TotalSpent)
.Take(top)
.ToListAsync();
var customerIds = topCustomers.Select(c => c.CustomerId).ToList();
var customers = await _context.Customers
.Where(c => customerIds.Contains(c.Id))
.ToDictionaryAsync(c => c.Id, c => c.Name);
return topCustomers.Select(c => new CustomerSummary
{
CustomerId = c.CustomerId,
CustomerName = customers.GetValueOrDefault(c.CustomerId, "Unknown"),
TotalOrders = c.OrderCount,
TotalSpent = c.TotalSpent,
AverageOrderValue = c.TotalSpent / c.OrderCount,
FirstOrderDate = c.FirstOrder,
LastOrderDate = c.LastOrder
});
}
}
public async IAsyncEnumerable<RealTimeAnalytics> StreamRealTimeAnalyticsAsync(
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
{
while (!cancellationToken.IsCancellationRequested)
{
var now = DateTime.UtcNow;
var hourAgo = now.AddHours(-1);
// Get orders from last hour
var recentOrders = await _context.Orders
.Where(o => o.OrderDate >= hourAgo)
.Include(o => o.Items)
.ThenInclude(oi => oi.Product)
.ToListAsync(cancellationToken);
// Calculate analytics
var analytics = new RealTimeAnalytics
{
Timestamp = now,
OrdersLastHour = recentOrders.Count,
RevenueLastHour = recentOrders.Sum(o => o.TotalAmount),
ActiveCustomers = recentOrders.Select(o => o.CustomerId).Distinct().Count(),
OrdersPerMinute = recentOrders.Count / 60.0
};
// Get top products
analytics.TopProductsLastHour = recentOrders
.SelectMany(o => o.Items)
.GroupBy(oi => oi.Product.Name)
.OrderByDescending(g => g.Sum(oi => oi.Quantity))
.Take(5)
.ToDictionary(g => g.Key, g => g.Sum(oi => oi.Quantity));
yield return analytics;
// Update memory stats
var process = Process.GetCurrentProcess();
_memoryStats.CurrentMemoryUsageMB = process.WorkingSet64 / (1024 * 1024);
_memoryStats.PeakMemoryUsageMB = Math.Max(_memoryStats.PeakMemoryUsageMB, _memoryStats.CurrentMemoryUsageMB);
await Task.Delay(1000, cancellationToken); // Wait before next update
}
}
public async Task<ReportResult> GenerateComplexReportAsync(
ReportRequest request,
string reportId,
ReportState? previousState,
CheckpointManager? checkpoint)
{
var stopwatch = Stopwatch.StartNew();
var state = previousState ?? new ReportState { ReportId = reportId };
var result = new ReportResult
{
ReportId = reportId,
GeneratedAt = DateTime.UtcNow,
Metrics = state.PartialResults
};
try
{
// Step 1: Calculate total revenue (0-25%)
if (state.ProgressPercent < 25)
{
var revenue = await CalculateTotalRevenueAsync(request.StartDate, request.EndDate);
result.Metrics["totalRevenue"] = revenue;
state.ProgressPercent = 25;
if (checkpoint?.ShouldCheckpoint() == true)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
_memoryStats.CheckpointsSaved++;
}
}
// Step 2: Calculate category breakdown (25-50%)
if (state.ProgressPercent < 50)
{
var categoryRevenue = await GetRevenueByCategoryAsync(request.StartDate, request.EndDate);
result.Metrics["categoryBreakdown"] = categoryRevenue;
state.ProgressPercent = 50;
if (checkpoint?.ShouldCheckpoint() == true)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
_memoryStats.CheckpointsSaved++;
}
}
// Step 3: Customer analytics (50-75%)
if (state.ProgressPercent < 75)
{
var topCustomers = await GetTopCustomersAsync(100, request.StartDate);
result.Metrics["topCustomers"] = topCustomers;
state.ProgressPercent = 75;
if (checkpoint?.ShouldCheckpoint() == true)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
_memoryStats.CheckpointsSaved++;
}
}
// Step 4: Product performance (75-100%)
if (state.ProgressPercent < 100)
{
var productStats = await CalculateProductPerformanceAsync(request.StartDate, request.EndDate);
result.Metrics["productPerformance"] = productStats;
state.ProgressPercent = 100;
}
result.Completed = true;
result.ProgressPercent = 100;
result.ProcessingTimeMs = stopwatch.ElapsedMilliseconds;
result.MemoryUsedMB = _memoryStats.CurrentMemoryUsageMB;
_logger.LogInformation("Report {reportId} completed in {time}ms", reportId, result.ProcessingTimeMs);
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error generating report {reportId}", reportId);
// Save checkpoint on error
if (checkpoint != null)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
}
throw;
}
}
public async Task<PatternAnalysisResult> AnalyzeOrderPatternsAsync(PatternAnalysisRequest request)
{
var stopwatch = Stopwatch.StartNew();
var result = new PatternAnalysisResult();
// Limit the analysis scope
var orders = await _context.Orders
.OrderByDescending(o => o.OrderDate)
.Take(request.MaxOrdersToAnalyze)
.Include(o => o.Items)
.ToListAsync();
result.RecordsProcessed = orders.Count;
// Analyze order patterns
result.OrderPatterns["averageOrderValue"] = orders.Average(o => (double)o.TotalAmount);
result.OrderPatterns["ordersPerDay"] = orders
.GroupBy(o => o.OrderDate.Date)
.Average(g => g.Count());
// Customer segmentation
if (request.IncludeCustomerSegmentation)
{
var customerGroups = orders
.GroupBy(o => o.CustomerId)
.Select(g => new
{
CustomerId = g.Key,
OrderCount = g.Count(),
TotalSpent = g.Sum(o => o.TotalAmount),
AverageOrder = g.Average(o => o.TotalAmount)
})
.ToList();
// Simple segmentation based on spending
result.CustomerSegments = new List<CustomerSegment>
{
new CustomerSegment
{
SegmentName = "High Value",
CustomerCount = customerGroups.Count(c => c.TotalSpent > 1000),
Characteristics = new Dictionary<string, double>
{
["averageOrderValue"] = customerGroups.Where(c => c.TotalSpent > 1000).Average(c => (double)c.AverageOrder),
["ordersPerCustomer"] = customerGroups.Where(c => c.TotalSpent > 1000).Average(c => c.OrderCount)
}
},
new CustomerSegment
{
SegmentName = "Regular",
CustomerCount = customerGroups.Count(c => c.TotalSpent >= 100 && c.TotalSpent <= 1000),
Characteristics = new Dictionary<string, double>
{
["averageOrderValue"] = customerGroups.Where(c => c.TotalSpent >= 100 && c.TotalSpent <= 1000).Average(c => (double)c.AverageOrder),
["ordersPerCustomer"] = customerGroups.Where(c => c.TotalSpent >= 100 && c.TotalSpent <= 1000).Average(c => c.OrderCount)
}
}
};
}
// Seasonal analysis
if (request.IncludeSeasonalAnalysis)
{
result.SeasonalAnalysis = new SeasonalAnalysis
{
MonthlySalesPattern = orders
.GroupBy(o => o.OrderDate.Month)
.ToDictionary(g => g.Key.ToString(), g => (double)g.Sum(o => o.TotalAmount)),
WeeklySalesPattern = orders
.GroupBy(o => o.OrderDate.DayOfWeek)
.ToDictionary(g => g.Key.ToString(), g => (double)g.Sum(o => o.TotalAmount)),
PeakPeriods = orders
.GroupBy(o => o.OrderDate.Date)
.OrderByDescending(g => g.Sum(o => o.TotalAmount))
.Take(5)
.Select(g => g.Key.ToString("yyyy-MM-dd"))
.ToList()
};
}
result.AnalysisTimeMs = stopwatch.ElapsedMilliseconds;
result.MemoryUsedMB = _memoryStats.CurrentMemoryUsageMB;
return result;
}
public MemoryStatistics GetMemoryStatistics()
{
var process = Process.GetCurrentProcess();
_memoryStats.CurrentMemoryUsageMB = process.WorkingSet64 / (1024 * 1024);
// Determine memory pressure
var usagePercent = (_memoryStats.CurrentMemoryUsageMB * 100) / _memoryOptions.MaxMemoryMB;
_memoryStats.CurrentMemoryPressure = usagePercent switch
{
< 50 => "Low",
< 80 => "Medium",
_ => "High"
};
return _memoryStats;
}
private async Task<decimal> CalculateTotalRevenueAsync(DateTime startDate, DateTime endDate)
{
var revenue = await _context.Orders
.Where(o => o.OrderDate >= startDate && o.OrderDate <= endDate)
.SumAsync(o => o.TotalAmount);
return revenue;
}
private async Task<object> CalculateProductPerformanceAsync(DateTime startDate, DateTime endDate)
{
var query = _context.OrderItems
.Include(oi => oi.Product)
.Include(oi => oi.Order)
.Where(oi => oi.Order.OrderDate >= startDate && oi.Order.OrderDate <= endDate);
var productPerformance = await query
.GroupBy(oi => new { oi.ProductId, oi.Product.Name })
.Select(g => new
{
ProductId = g.Key.ProductId,
ProductName = g.Key.Name,
UnitsSold = g.Sum(oi => oi.Quantity),
Revenue = g.Sum(oi => oi.TotalPrice),
OrderCount = g.Select(oi => oi.OrderId).Distinct().Count()
})
.OrderByDescending(p => p.Revenue)
.Take(50)
.ToListAsync();
return productPerformance;
}
}