Files
2025-07-21 18:06:37 -04:00

146 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""Visualize Ollama experiment results"""
import json
import matplotlib.pyplot as plt
import numpy as np
def create_visualizations():
# Load results
with open("ollama_experiment_results.json", "r") as f:
results = json.load(f)
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
fig.suptitle(f"LLM Space-Time Tradeoffs with {results['model']}", fontsize=16)
# 1. Context Chunking Performance
ax1 = axes[0, 0]
context = results["experiments"]["context_chunking"]
methods = ["Full Context\n(O(n) memory)", "Chunked √n\n(O(√n) memory)"]
times = [context["full_context"]["time"], context["chunked_context"]["time"]]
memory = [context["full_context"]["memory_delta"], context["chunked_context"]["memory_delta"]]
x = np.arange(len(methods))
width = 0.35
ax1_mem = ax1.twinx()
bars1 = ax1.bar(x - width/2, times, width, label='Time (s)', color='skyblue')
bars2 = ax1_mem.bar(x + width/2, memory, width, label='Memory (MB)', color='lightcoral')
ax1.set_ylabel('Time (seconds)', color='skyblue')
ax1_mem.set_ylabel('Memory Delta (MB)', color='lightcoral')
ax1.set_title('Context Processing: Time vs Memory')
ax1.set_xticks(x)
ax1.set_xticklabels(methods)
# Add value labels
for bar in bars1:
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.1f}s', ha='center', va='bottom')
for bar in bars2:
height = bar.get_height()
ax1_mem.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.2f}MB', ha='center', va='bottom')
# 2. Streaming Performance
ax2 = axes[0, 1]
streaming = results["experiments"]["streaming"]
methods = ["Full Generation", "Streaming"]
times = [streaming["full_generation"]["time"], streaming["streaming_generation"]["time"]]
tokens = [streaming["full_generation"]["estimated_tokens"],
streaming["streaming_generation"]["estimated_tokens"]]
ax2.bar(methods, times, color=['#ff9999', '#66b3ff'])
ax2.set_ylabel('Time (seconds)')
ax2.set_title('Streaming vs Full Generation')
for i, (t, tok) in enumerate(zip(times, tokens)):
ax2.text(i, t, f'{t:.2f}s\n({tok} tokens)', ha='center', va='bottom')
# 3. Checkpointing Overhead
ax3 = axes[1, 0]
checkpoint = results["experiments"]["checkpointing"]
methods = ["No Checkpoint", f"Checkpoint every {checkpoint['with_checkpoint']['checkpoint_interval']}"]
times = [checkpoint["no_checkpoint"]["time"], checkpoint["with_checkpoint"]["time"]]
bars = ax3.bar(methods, times, color=['#90ee90', '#ffd700'])
ax3.set_ylabel('Time (seconds)')
ax3.set_title('Checkpointing Time Overhead')
# Calculate overhead
overhead = (times[1] / times[0] - 1) * 100
ax3.text(0.5, max(times) * 0.9, f'Overhead: {overhead:.1f}%',
ha='center', transform=ax3.transAxes, fontsize=12,
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
for bar, t in zip(bars, times):
ax3.text(bar.get_x() + bar.get_width()/2., bar.get_height(),
f'{t:.1f}s', ha='center', va='bottom')
# 4. Summary Statistics
ax4 = axes[1, 1]
ax4.axis('off')
summary_text = f"""
Key Findings:
1. Context Chunking (√n chunks):
• Slowdown: {context['chunked_context']['time']/context['full_context']['time']:.1f}x
• Chunks processed: {context['chunked_context']['num_chunks']}
• Chunk size: {context['chunked_context']['chunk_size']} chars
2. Streaming vs Full:
• Time difference: {abs(streaming['streaming_generation']['time'] - streaming['full_generation']['time']):.2f}s
• Tokens generated: ~{streaming['full_generation']['estimated_tokens']}
3. Checkpointing:
• Time overhead: {overhead:.1f}%
• Checkpoints created: {checkpoint['with_checkpoint']['num_checkpoints']}
• Interval: Every {checkpoint['with_checkpoint']['checkpoint_interval']} prompts
Conclusion: Real LLM inference shows significant
time overhead (18x) for √n memory reduction,
validating theoretical space-time tradeoffs.
"""
ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
fontsize=11, verticalalignment='top', family='monospace',
bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.3))
# Adjust layout to prevent overlapping
plt.subplots_adjust(hspace=0.3, wspace=0.3)
plt.savefig('ollama_spacetime_results.png', dpi=150, bbox_inches='tight')
plt.close() # Close the figure to free memory
print("Visualization saved to: ollama_spacetime_results.png")
# Create a second figure for detailed chunk analysis
fig2, ax = plt.subplots(1, 1, figsize=(10, 6))
# Show the √n relationship
n_values = np.logspace(2, 6, 50) # 100 to 1M
sqrt_n = np.sqrt(n_values)
ax.loglog(n_values, n_values, 'b-', label='O(n) - Full context', linewidth=2)
ax.loglog(n_values, sqrt_n, 'r--', label='O(√n) - Chunked', linewidth=2)
# Add our experimental point
text_size = 14750 # Total context length from experiment
chunk_count = results["experiments"]["context_chunking"]["chunked_context"]["num_chunks"]
chunk_size = results["experiments"]["context_chunking"]["chunked_context"]["chunk_size"]
ax.scatter([text_size], [chunk_count], color='green', s=100, zorder=5,
label=f'Our experiment: {chunk_count} chunks of {chunk_size} chars')
ax.set_xlabel('Context Size (characters)')
ax.set_ylabel('Memory/Processing Units')
ax.set_title('Space Complexity: Full vs Chunked Processing')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('ollama_sqrt_n_relationship.png', dpi=150, bbox_inches='tight')
plt.close() # Close the figure
print("√n relationship saved to: ollama_sqrt_n_relationship.png")
if __name__ == "__main__":
create_visualizations()