sqrtspace-tools/compiler/example_code.py

#!/usr/bin/env python3
"""
Example code to demonstrate SpaceTime Compiler optimizations
This file contains various patterns that can be optimized.
"""

import numpy as np
from typing import List, Dict, Tuple


def process_large_dataset(data: List[float], threshold: float) -> Dict[str, List[float]]:
    """Process large dataset with multiple optimization opportunities"""
    # Opportunity 1: Large list accumulation
    filtered_data = []
    for value in data:
        if value > threshold:
            filtered_data.append(value * 2.0)

    # Opportunity 2: Sorting large data
    sorted_data = sorted(filtered_data)

    # Opportunity 3: Accumulation in loop
    total = 0.0
    count = 0
    for value in sorted_data:
        total += value
        count += 1

    mean = total / count if count > 0 else 0.0

    # Opportunity 4: Large comprehension
    squared_deviations = [(x - mean) ** 2 for x in sorted_data]

    # Opportunity 5: Grouping with accumulation
    groups = {}
    for i, value in enumerate(sorted_data):
        group_key = f"group_{int(value // 100)}"
        if group_key not in groups:
            groups[group_key] = []
        groups[group_key].append(value)

    return groups


def matrix_computation(A: np.ndarray, B: np.ndarray, C: np.ndarray) -> np.ndarray:
    """Matrix operations that can benefit from cache blocking"""
    # Opportunity: Matrix multiplication
    result1 = np.dot(A, B)

    # Opportunity: Another matrix multiplication
    result2 = np.dot(result1, C)

    # Opportunity: Element-wise operations in loop
    n_rows, n_cols = result2.shape
    for i in range(n_rows):
        for j in range(n_cols):
            result2[i, j] = np.sqrt(result2[i, j]) if result2[i, j] > 0 else 0

    return result2


def analyze_log_files(log_paths: List[str]) -> Dict[str, int]:
    """Analyze multiple log files - external memory opportunity"""
    # Opportunity: Large accumulation
    all_entries = []
    for path in log_paths:
        with open(path, 'r') as f:
            entries = f.readlines()
            all_entries.extend(entries)

    # Opportunity: Processing large list
    error_counts = {}
    for entry in all_entries:
        if 'ERROR' in entry:
            error_type = extract_error_type(entry)
            if error_type not in error_counts:
                error_counts[error_type] = 0
            error_counts[error_type] += 1

    return error_counts


def extract_error_type(log_entry: str) -> str:
    """Helper function to extract error type"""
    # Simplified error extraction
    if 'FileNotFound' in log_entry:
        return 'FileNotFound'
    elif 'ValueError' in log_entry:
        return 'ValueError'
    elif 'KeyError' in log_entry:
        return 'KeyError'
    else:
        return 'Unknown'


def simulate_particles(n_particles: int, n_steps: int) -> List[np.ndarray]:
    """Particle simulation with checkpointing opportunity"""
    # Initialize particles
    positions = np.random.rand(n_particles, 3)
    velocities = np.random.rand(n_particles, 3) - 0.5

    # Opportunity: Large trajectory accumulation
    trajectory = []

    # Opportunity: Large loop with accumulation
    for step in range(n_steps):
        # Update positions
        positions += velocities * 0.01  # dt = 0.01

        # Apply boundary conditions
        positions = np.clip(positions, 0, 1)

        # Store position (checkpoint opportunity)
        trajectory.append(positions.copy())

        # Apply some forces
        velocities *= 0.99  # Damping

    return trajectory


def build_index(documents: List[str]) -> Dict[str, List[int]]:
    """Build inverted index - memory optimization opportunity"""
    # Opportunity: Large dictionary with lists
    index = {}

    # Opportunity: Nested loops with accumulation
    for doc_id, document in enumerate(documents):
        words = document.lower().split()

        for word in words:
            if word not in index:
                index[word] = []
            index[word].append(doc_id)

    # Opportunity: Sorting index values
    for word in index:
        index[word] = sorted(set(index[word]))

    return index


def process_stream(data_stream) -> Tuple[float, float]:
    """Process streaming data - generator opportunity"""
    # Opportunity: Could use generator instead of list
    values = [float(x) for x in data_stream]

    # Calculate statistics
    mean = sum(values) / len(values)
    variance = sum((x - mean) ** 2 for x in values) / len(values)

    return mean, variance


def graph_analysis(adjacency_list: Dict[int, List[int]], start_node: int) -> List[int]:
    """Graph traversal - memory-bounded opportunity"""
    visited = set()
    # Opportunity: Queue could be memory-bounded
    queue = [start_node]
    traversal_order = []

    while queue:
        node = queue.pop(0)
        if node not in visited:
            visited.add(node)
            traversal_order.append(node)

            # Add all neighbors
            for neighbor in adjacency_list.get(node, []):
                if neighbor not in visited:
                    queue.append(neighbor)

    return traversal_order


if __name__ == "__main__":
    # Example usage
    print("This file demonstrates various optimization opportunities")
    print("Run the SpaceTime Compiler on this file to see optimizations")

    # Small examples
    data = list(range(10000))
    result = process_large_dataset(data, 5000)
    print(f"Processed {len(data)} items into {len(result)} groups")

    # Matrix example
    A = np.random.rand(100, 100)
    B = np.random.rand(100, 100)
    C = np.random.rand(100, 100)
    result_matrix = matrix_computation(A, B, C)
    print(f"Matrix computation result shape: {result_matrix.shape}")