#!/usr/bin/env python3 """Visualize Ollama experiment results""" import json import matplotlib.pyplot as plt import numpy as np def create_visualizations(): # Load results with open("ollama_experiment_results.json", "r") as f: results = json.load(f) fig, axes = plt.subplots(2, 2, figsize=(12, 10)) fig.suptitle(f"LLM Space-Time Tradeoffs with {results['model']}", fontsize=16) # 1. Context Chunking Performance ax1 = axes[0, 0] context = results["experiments"]["context_chunking"] methods = ["Full Context\n(O(n) memory)", "Chunked √n\n(O(√n) memory)"] times = [context["full_context"]["time"], context["chunked_context"]["time"]] memory = [context["full_context"]["memory_delta"], context["chunked_context"]["memory_delta"]] x = np.arange(len(methods)) width = 0.35 ax1_mem = ax1.twinx() bars1 = ax1.bar(x - width/2, times, width, label='Time (s)', color='skyblue') bars2 = ax1_mem.bar(x + width/2, memory, width, label='Memory (MB)', color='lightcoral') ax1.set_ylabel('Time (seconds)', color='skyblue') ax1_mem.set_ylabel('Memory Delta (MB)', color='lightcoral') ax1.set_title('Context Processing: Time vs Memory') ax1.set_xticks(x) ax1.set_xticklabels(methods) # Add value labels for bar in bars1: height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height, f'{height:.1f}s', ha='center', va='bottom') for bar in bars2: height = bar.get_height() ax1_mem.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2f}MB', ha='center', va='bottom') # 2. Streaming Performance ax2 = axes[0, 1] streaming = results["experiments"]["streaming"] methods = ["Full Generation", "Streaming"] times = [streaming["full_generation"]["time"], streaming["streaming_generation"]["time"]] tokens = [streaming["full_generation"]["estimated_tokens"], streaming["streaming_generation"]["estimated_tokens"]] ax2.bar(methods, times, color=['#ff9999', '#66b3ff']) ax2.set_ylabel('Time (seconds)') ax2.set_title('Streaming vs Full Generation') for i, (t, tok) in enumerate(zip(times, tokens)): ax2.text(i, t, f'{t:.2f}s\n({tok} tokens)', ha='center', va='bottom') # 3. Checkpointing Overhead ax3 = axes[1, 0] checkpoint = results["experiments"]["checkpointing"] methods = ["No Checkpoint", f"Checkpoint every {checkpoint['with_checkpoint']['checkpoint_interval']}"] times = [checkpoint["no_checkpoint"]["time"], checkpoint["with_checkpoint"]["time"]] bars = ax3.bar(methods, times, color=['#90ee90', '#ffd700']) ax3.set_ylabel('Time (seconds)') ax3.set_title('Checkpointing Time Overhead') # Calculate overhead overhead = (times[1] / times[0] - 1) * 100 ax3.text(0.5, max(times) * 0.9, f'Overhead: {overhead:.1f}%', ha='center', transform=ax3.transAxes, fontsize=12, bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) for bar, t in zip(bars, times): ax3.text(bar.get_x() + bar.get_width()/2., bar.get_height(), f'{t:.1f}s', ha='center', va='bottom') # 4. Summary Statistics ax4 = axes[1, 1] ax4.axis('off') summary_text = f""" Key Findings: 1. Context Chunking (√n chunks): • Slowdown: {context['chunked_context']['time']/context['full_context']['time']:.1f}x • Chunks processed: {context['chunked_context']['num_chunks']} • Chunk size: {context['chunked_context']['chunk_size']} chars 2. Streaming vs Full: • Time difference: {abs(streaming['streaming_generation']['time'] - streaming['full_generation']['time']):.2f}s • Tokens generated: ~{streaming['full_generation']['estimated_tokens']} 3. Checkpointing: • Time overhead: {overhead:.1f}% • Checkpoints created: {checkpoint['with_checkpoint']['num_checkpoints']} • Interval: Every {checkpoint['with_checkpoint']['checkpoint_interval']} prompts Conclusion: Real LLM inference shows significant time overhead (18x) for √n memory reduction, validating theoretical space-time tradeoffs. """ ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes, fontsize=11, verticalalignment='top', family='monospace', bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.3)) # Adjust layout to prevent overlapping plt.subplots_adjust(hspace=0.3, wspace=0.3) plt.savefig('ollama_spacetime_results.png', dpi=150, bbox_inches='tight') plt.close() # Close the figure to free memory print("Visualization saved to: ollama_spacetime_results.png") # Create a second figure for detailed chunk analysis fig2, ax = plt.subplots(1, 1, figsize=(10, 6)) # Show the √n relationship n_values = np.logspace(2, 6, 50) # 100 to 1M sqrt_n = np.sqrt(n_values) ax.loglog(n_values, n_values, 'b-', label='O(n) - Full context', linewidth=2) ax.loglog(n_values, sqrt_n, 'r--', label='O(√n) - Chunked', linewidth=2) # Add our experimental point text_size = 14750 # Total context length from experiment chunk_count = results["experiments"]["context_chunking"]["chunked_context"]["num_chunks"] chunk_size = results["experiments"]["context_chunking"]["chunked_context"]["chunk_size"] ax.scatter([text_size], [chunk_count], color='green', s=100, zorder=5, label=f'Our experiment: {chunk_count} chunks of {chunk_size} chars') ax.set_xlabel('Context Size (characters)') ax.set_ylabel('Memory/Processing Units') ax.set_title('Space Complexity: Full vs Chunked Processing') ax.legend() ax.grid(True, alpha=0.3) plt.tight_layout() plt.savefig('ollama_sqrt_n_relationship.png', dpi=150, bbox_inches='tight') plt.close() # Close the figure print("√n relationship saved to: ollama_sqrt_n_relationship.png") if __name__ == "__main__": create_visualizations()