sqrtspace-experiments/experiments/llm_ollama/visualize_results.py

#!/usr/bin/env python3
"""Visualize Ollama experiment results"""

import json
import matplotlib.pyplot as plt
import numpy as np

def create_visualizations():
    # Load results
    with open("ollama_experiment_results.json", "r") as f:
        results = json.load(f)

    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    fig.suptitle(f"LLM Space-Time Tradeoffs with {results['model']}", fontsize=16)

    # 1. Context Chunking Performance
    ax1 = axes[0, 0]
    context = results["experiments"]["context_chunking"]
    methods = ["Full Context\n(O(n) memory)", "Chunked √n\n(O(√n) memory)"]
    times = [context["full_context"]["time"], context["chunked_context"]["time"]]
    memory = [context["full_context"]["memory_delta"], context["chunked_context"]["memory_delta"]]

    x = np.arange(len(methods))
    width = 0.35

    ax1_mem = ax1.twinx()
    bars1 = ax1.bar(x - width/2, times, width, label='Time (s)', color='skyblue')
    bars2 = ax1_mem.bar(x + width/2, memory, width, label='Memory (MB)', color='lightcoral')

    ax1.set_ylabel('Time (seconds)', color='skyblue')
    ax1_mem.set_ylabel('Memory Delta (MB)', color='lightcoral')
    ax1.set_title('Context Processing: Time vs Memory')
    ax1.set_xticks(x)
    ax1.set_xticklabels(methods)

    # Add value labels
    for bar in bars1:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1f}s', ha='center', va='bottom')
    for bar in bars2:
        height = bar.get_height()
        ax1_mem.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.2f}MB', ha='center', va='bottom')

    # 2. Streaming Performance
    ax2 = axes[0, 1]
    streaming = results["experiments"]["streaming"]
    methods = ["Full Generation", "Streaming"]
    times = [streaming["full_generation"]["time"], streaming["streaming_generation"]["time"]]
    tokens = [streaming["full_generation"]["estimated_tokens"],
              streaming["streaming_generation"]["estimated_tokens"]]

    ax2.bar(methods, times, color=['#ff9999', '#66b3ff'])
    ax2.set_ylabel('Time (seconds)')
    ax2.set_title('Streaming vs Full Generation')

    for i, (t, tok) in enumerate(zip(times, tokens)):
        ax2.text(i, t, f'{t:.2f}s\n({tok} tokens)', ha='center', va='bottom')

    # 3. Checkpointing Overhead
    ax3 = axes[1, 0]
    checkpoint = results["experiments"]["checkpointing"]
    methods = ["No Checkpoint", f"Checkpoint every {checkpoint['with_checkpoint']['checkpoint_interval']}"]
    times = [checkpoint["no_checkpoint"]["time"], checkpoint["with_checkpoint"]["time"]]

    bars = ax3.bar(methods, times, color=['#90ee90', '#ffd700'])
    ax3.set_ylabel('Time (seconds)')
    ax3.set_title('Checkpointing Time Overhead')

    # Calculate overhead
    overhead = (times[1] / times[0] - 1) * 100
    ax3.text(0.5, max(times) * 0.9, f'Overhead: {overhead:.1f}%',
             ha='center', transform=ax3.transAxes, fontsize=12,
             bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

    for bar, t in zip(bars, times):
        ax3.text(bar.get_x() + bar.get_width()/2., bar.get_height(),
                f'{t:.1f}s', ha='center', va='bottom')

    # 4. Summary Statistics
    ax4 = axes[1, 1]
    ax4.axis('off')

    summary_text = f"""
Key Findings:

1. Context Chunking (√n chunks):
   • Slowdown: {context['chunked_context']['time']/context['full_context']['time']:.1f}x
   • Chunks processed: {context['chunked_context']['num_chunks']}
   • Chunk size: {context['chunked_context']['chunk_size']} chars

2. Streaming vs Full:
   • Time difference: {abs(streaming['streaming_generation']['time'] - streaming['full_generation']['time']):.2f}s
   • Tokens generated: ~{streaming['full_generation']['estimated_tokens']}

3. Checkpointing:
   • Time overhead: {overhead:.1f}%
   • Checkpoints created: {checkpoint['with_checkpoint']['num_checkpoints']}
   • Interval: Every {checkpoint['with_checkpoint']['checkpoint_interval']} prompts

Conclusion: Real LLM inference shows significant
time overhead (18x) for √n memory reduction,
validating theoretical space-time tradeoffs.
"""

    ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
             fontsize=11, verticalalignment='top', family='monospace',
             bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.3))

    # Adjust layout to prevent overlapping
    plt.subplots_adjust(hspace=0.3, wspace=0.3)
    plt.savefig('ollama_spacetime_results.png', dpi=150, bbox_inches='tight')
    plt.close()  # Close the figure to free memory
    print("Visualization saved to: ollama_spacetime_results.png")

    # Create a second figure for detailed chunk analysis
    fig2, ax = plt.subplots(1, 1, figsize=(10, 6))

    # Show the √n relationship
    n_values = np.logspace(2, 6, 50)  # 100 to 1M
    sqrt_n = np.sqrt(n_values)

    ax.loglog(n_values, n_values, 'b-', label='O(n) - Full context', linewidth=2)
    ax.loglog(n_values, sqrt_n, 'r--', label='O(√n) - Chunked', linewidth=2)

    # Add our experimental point
    text_size = 14750  # Total context length from experiment
    chunk_count = results["experiments"]["context_chunking"]["chunked_context"]["num_chunks"]
    chunk_size = results["experiments"]["context_chunking"]["chunked_context"]["chunk_size"]
    ax.scatter([text_size], [chunk_count], color='green', s=100, zorder=5,
               label=f'Our experiment: {chunk_count} chunks of {chunk_size} chars')

    ax.set_xlabel('Context Size (characters)')
    ax.set_ylabel('Memory/Processing Units')
    ax.set_title('Space Complexity: Full vs Chunked Processing')
    ax.legend()
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig('ollama_sqrt_n_relationship.png', dpi=150, bbox_inches='tight')
    plt.close()  # Close the figure
    print("√n relationship saved to: ollama_sqrt_n_relationship.png")

if __name__ == "__main__":
    create_visualizations()