191 lines
5.7 KiB
Python
191 lines
5.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Example code to demonstrate SpaceTime Compiler optimizations
|
|
This file contains various patterns that can be optimized.
|
|
"""
|
|
|
|
import numpy as np
|
|
from typing import List, Dict, Tuple
|
|
|
|
|
|
def process_large_dataset(data: List[float], threshold: float) -> Dict[str, List[float]]:
|
|
"""Process large dataset with multiple optimization opportunities"""
|
|
# Opportunity 1: Large list accumulation
|
|
filtered_data = []
|
|
for value in data:
|
|
if value > threshold:
|
|
filtered_data.append(value * 2.0)
|
|
|
|
# Opportunity 2: Sorting large data
|
|
sorted_data = sorted(filtered_data)
|
|
|
|
# Opportunity 3: Accumulation in loop
|
|
total = 0.0
|
|
count = 0
|
|
for value in sorted_data:
|
|
total += value
|
|
count += 1
|
|
|
|
mean = total / count if count > 0 else 0.0
|
|
|
|
# Opportunity 4: Large comprehension
|
|
squared_deviations = [(x - mean) ** 2 for x in sorted_data]
|
|
|
|
# Opportunity 5: Grouping with accumulation
|
|
groups = {}
|
|
for i, value in enumerate(sorted_data):
|
|
group_key = f"group_{int(value // 100)}"
|
|
if group_key not in groups:
|
|
groups[group_key] = []
|
|
groups[group_key].append(value)
|
|
|
|
return groups
|
|
|
|
|
|
def matrix_computation(A: np.ndarray, B: np.ndarray, C: np.ndarray) -> np.ndarray:
|
|
"""Matrix operations that can benefit from cache blocking"""
|
|
# Opportunity: Matrix multiplication
|
|
result1 = np.dot(A, B)
|
|
|
|
# Opportunity: Another matrix multiplication
|
|
result2 = np.dot(result1, C)
|
|
|
|
# Opportunity: Element-wise operations in loop
|
|
n_rows, n_cols = result2.shape
|
|
for i in range(n_rows):
|
|
for j in range(n_cols):
|
|
result2[i, j] = np.sqrt(result2[i, j]) if result2[i, j] > 0 else 0
|
|
|
|
return result2
|
|
|
|
|
|
def analyze_log_files(log_paths: List[str]) -> Dict[str, int]:
|
|
"""Analyze multiple log files - external memory opportunity"""
|
|
# Opportunity: Large accumulation
|
|
all_entries = []
|
|
for path in log_paths:
|
|
with open(path, 'r') as f:
|
|
entries = f.readlines()
|
|
all_entries.extend(entries)
|
|
|
|
# Opportunity: Processing large list
|
|
error_counts = {}
|
|
for entry in all_entries:
|
|
if 'ERROR' in entry:
|
|
error_type = extract_error_type(entry)
|
|
if error_type not in error_counts:
|
|
error_counts[error_type] = 0
|
|
error_counts[error_type] += 1
|
|
|
|
return error_counts
|
|
|
|
|
|
def extract_error_type(log_entry: str) -> str:
|
|
"""Helper function to extract error type"""
|
|
# Simplified error extraction
|
|
if 'FileNotFound' in log_entry:
|
|
return 'FileNotFound'
|
|
elif 'ValueError' in log_entry:
|
|
return 'ValueError'
|
|
elif 'KeyError' in log_entry:
|
|
return 'KeyError'
|
|
else:
|
|
return 'Unknown'
|
|
|
|
|
|
def simulate_particles(n_particles: int, n_steps: int) -> List[np.ndarray]:
|
|
"""Particle simulation with checkpointing opportunity"""
|
|
# Initialize particles
|
|
positions = np.random.rand(n_particles, 3)
|
|
velocities = np.random.rand(n_particles, 3) - 0.5
|
|
|
|
# Opportunity: Large trajectory accumulation
|
|
trajectory = []
|
|
|
|
# Opportunity: Large loop with accumulation
|
|
for step in range(n_steps):
|
|
# Update positions
|
|
positions += velocities * 0.01 # dt = 0.01
|
|
|
|
# Apply boundary conditions
|
|
positions = np.clip(positions, 0, 1)
|
|
|
|
# Store position (checkpoint opportunity)
|
|
trajectory.append(positions.copy())
|
|
|
|
# Apply some forces
|
|
velocities *= 0.99 # Damping
|
|
|
|
return trajectory
|
|
|
|
|
|
def build_index(documents: List[str]) -> Dict[str, List[int]]:
|
|
"""Build inverted index - memory optimization opportunity"""
|
|
# Opportunity: Large dictionary with lists
|
|
index = {}
|
|
|
|
# Opportunity: Nested loops with accumulation
|
|
for doc_id, document in enumerate(documents):
|
|
words = document.lower().split()
|
|
|
|
for word in words:
|
|
if word not in index:
|
|
index[word] = []
|
|
index[word].append(doc_id)
|
|
|
|
# Opportunity: Sorting index values
|
|
for word in index:
|
|
index[word] = sorted(set(index[word]))
|
|
|
|
return index
|
|
|
|
|
|
def process_stream(data_stream) -> Tuple[float, float]:
|
|
"""Process streaming data - generator opportunity"""
|
|
# Opportunity: Could use generator instead of list
|
|
values = [float(x) for x in data_stream]
|
|
|
|
# Calculate statistics
|
|
mean = sum(values) / len(values)
|
|
variance = sum((x - mean) ** 2 for x in values) / len(values)
|
|
|
|
return mean, variance
|
|
|
|
|
|
def graph_analysis(adjacency_list: Dict[int, List[int]], start_node: int) -> List[int]:
|
|
"""Graph traversal - memory-bounded opportunity"""
|
|
visited = set()
|
|
# Opportunity: Queue could be memory-bounded
|
|
queue = [start_node]
|
|
traversal_order = []
|
|
|
|
while queue:
|
|
node = queue.pop(0)
|
|
if node not in visited:
|
|
visited.add(node)
|
|
traversal_order.append(node)
|
|
|
|
# Add all neighbors
|
|
for neighbor in adjacency_list.get(node, []):
|
|
if neighbor not in visited:
|
|
queue.append(neighbor)
|
|
|
|
return traversal_order
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage
|
|
print("This file demonstrates various optimization opportunities")
|
|
print("Run the SpaceTime Compiler on this file to see optimizations")
|
|
|
|
# Small examples
|
|
data = list(range(10000))
|
|
result = process_large_dataset(data, 5000)
|
|
print(f"Processed {len(data)} items into {len(result)} groups")
|
|
|
|
# Matrix example
|
|
A = np.random.rand(100, 100)
|
|
B = np.random.rand(100, 100)
|
|
C = np.random.rand(100, 100)
|
|
result_matrix = matrix_computation(A, B, C)
|
|
print(f"Matrix computation result shape: {result_matrix.shape}") |