""" Interactive Dashboard for Space-Time Tradeoffs Visualizes Williams' theoretical result and practical manifestations """ import streamlit as st import numpy as np import pandas as pd import plotly.graph_objects as go import plotly.express as px from plotly.subplots import make_subplots import json from pathlib import Path # Page configuration st.set_page_config( page_title="Space-Time Tradeoffs Dashboard", page_icon="šŸ“Š", layout="wide" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # Title and introduction st.title("šŸ”„ The Ubiquity of Space-Time Tradeoffs") st.markdown(""" This dashboard demonstrates **Ryan Williams' 2025 result**: TIME[t] āŠ† SPACE[√(t log t)] Explore how this theoretical bound manifests in real computing systems. """) # Sidebar navigation page = st.sidebar.selectbox( "Choose a visualization", ["Overview", "Theoretical Explorer", "Experimental Results", "Real-World Systems", "Tradeoff Calculator", "Interactive Demos"] ) # Helper functions def create_space_time_curve(n_points=100): """Generate theoretical space-time tradeoff curve""" t = np.logspace(1, 6, n_points) s_williams = np.sqrt(t * np.log(t)) s_naive = t s_minimal = np.log(t) return t, s_williams, s_naive, s_minimal def create_3d_tradeoff_surface(): """Create 3D visualization of space-time-quality tradeoffs""" space = np.logspace(0, 3, 50) time = np.logspace(0, 3, 50) S, T = np.meshgrid(space, time) # Quality as function of space and time Q = 1 / (1 + np.exp(-(np.log(S) + np.log(T) - 4))) return S, T, Q # Page: Overview if page == "Overview": st.header("Key Concepts") col1, col2, col3 = st.columns(3) with col1: st.metric("Theoretical Bound", "√(t log t)", "Space for time t") st.info("Any computation taking time t can be done with √(t log t) memory") with col2: st.metric("Practical Factor", "100-10,000Ɨ", "Constant overhead") st.warning("Real systems have I/O, cache hierarchies, coordination costs") with col3: st.metric("Ubiquity", "Everywhere", "In modern systems") st.success("Databases, ML, distributed systems all use these tradeoffs") # Main visualization st.subheader("The Fundamental Tradeoff") t, s_williams, s_naive, s_minimal = create_space_time_curve() fig = go.Figure() fig.add_trace(go.Scatter( x=t, y=s_naive, mode='lines', name='Naive (Space = Time)', line=dict(color='red', dash='dash') )) fig.add_trace(go.Scatter( x=t, y=s_williams, mode='lines', name='Williams\' Bound: √(t log t)', line=dict(color='blue', width=3) )) fig.add_trace(go.Scatter( x=t, y=s_minimal, mode='lines', name='Minimal Space: log(t)', line=dict(color='green', dash='dot') )) fig.update_xaxes(type="log", title="Time (t)") fig.update_yaxes(type="log", title="Space (s)") fig.update_layout( title="Theoretical Space-Time Bounds", height=500, hovermode='x', template="plotly_dark" ) st.plotly_chart(fig, use_container_width=True) # Page: Theoretical Explorer elif page == "Theoretical Explorer": st.header("Interactive Theoretical Explorer") col1, col2 = st.columns([1, 2]) with col1: st.subheader("Parameters") time_complexity = st.slider( "Time Complexity (log scale)", min_value=1.0, max_value=6.0, value=3.0, step=0.1 ) show_practical = st.checkbox("Show practical bounds", value=True) constant_factor = st.slider( "Constant factor", min_value=1, max_value=1000, value=100, disabled=not show_practical ) t_value = 10 ** time_complexity s_theory = np.sqrt(t_value * np.log(t_value)) s_practical = s_theory * constant_factor if show_practical else s_theory st.metric("Time (t)", f"{t_value:,.0f}") st.metric("Space (theory)", f"{s_theory:,.0f}") if show_practical: st.metric("Space (practical)", f"{s_practical:,.0f}") with col2: # Create visualization t_range = np.logspace(1, 6, 100) s_range_theory = np.sqrt(t_range * np.log(t_range)) s_range_practical = s_range_theory * constant_factor fig = go.Figure() fig.add_trace(go.Scatter( x=t_range, y=s_range_theory, mode='lines', name='Theoretical Bound', line=dict(color='blue', width=2) )) if show_practical: fig.add_trace(go.Scatter( x=t_range, y=s_range_practical, mode='lines', name=f'Practical ({constant_factor}Ɨ overhead)', line=dict(color='orange', width=2) )) # Add current point fig.add_trace(go.Scatter( x=[t_value], y=[s_theory], mode='markers', name='Current Selection', marker=dict(size=15, color='red', symbol='star') )) fig.update_xaxes(type="log", title="Time") fig.update_yaxes(type="log", title="Space") fig.update_layout( title="Space Requirements for Time-Bounded Computation", height=500, template="plotly_dark" ) st.plotly_chart(fig, use_container_width=True) # Page: Experimental Results elif page == "Experimental Results": st.header("Experimental Validation") tabs = st.tabs(["Maze Solver", "Sorting", "Streaming", "Summary"]) with tabs[0]: st.subheader("Maze Solving Algorithms") # Simulated data (in practice, load from experiment results) maze_data = pd.DataFrame({ 'Size': [20, 30, 40, 50], 'BFS_Time': [0.001, 0.003, 0.008, 0.015], 'BFS_Memory': [1600, 3600, 6400, 10000], 'Limited_Time': [0.01, 0.05, 0.15, 0.35], 'Limited_Memory': [80, 120, 160, 200] }) fig = make_subplots( rows=1, cols=2, subplot_titles=("Time Complexity", "Memory Usage") ) fig.add_trace( go.Scatter(x=maze_data['Size'], y=maze_data['BFS_Time'], name='BFS', mode='lines+markers'), row=1, col=1 ) fig.add_trace( go.Scatter(x=maze_data['Size'], y=maze_data['Limited_Time'], name='Memory-Limited', mode='lines+markers'), row=1, col=1 ) fig.add_trace( go.Scatter(x=maze_data['Size'], y=maze_data['BFS_Memory'], name='BFS', mode='lines+markers', showlegend=False), row=1, col=2 ) fig.add_trace( go.Scatter(x=maze_data['Size'], y=maze_data['Limited_Memory'], name='Memory-Limited', mode='lines+markers', showlegend=False), row=1, col=2 ) fig.update_xaxes(title_text="Maze Size", row=1, col=1) fig.update_xaxes(title_text="Maze Size", row=1, col=2) fig.update_yaxes(title_text="Time (s)", row=1, col=1) fig.update_yaxes(title_text="Memory (cells)", row=1, col=2) fig.update_layout(height=400, template="plotly_dark") st.plotly_chart(fig, use_container_width=True) st.info("Memory-limited DFS uses √n memory but requires ~n√n time due to recomputation") with tabs[1]: st.subheader("Sorting with Checkpoints") sort_times = { 'Size': [1000, 5000, 10000, 20000], 'In_Memory': [0.00001, 0.0001, 0.0003, 0.0008], 'Checkpointed': [0.268, 2.5, 8.2, 25.3], 'Ratio': [26800, 25000, 27333, 31625] } df = pd.DataFrame(sort_times) fig = px.bar(df, x='Size', y=['In_Memory', 'Checkpointed'], title="Sorting Time: In-Memory vs Checkpointed", labels={'value': 'Time (seconds)', 'variable': 'Method'}, log_y=True, barmode='group', template="plotly_dark") st.plotly_chart(fig, use_container_width=True) st.warning("Checkpointed sorting shows massive overhead (>1000Ɨ) due to disk I/O") with tabs[2]: st.subheader("Stream Processing") stream_data = { 'Window_Size': [10, 50, 100, 500, 1000], 'Full_Storage_Time': [0.005, 0.025, 0.05, 0.25, 0.5], 'Sliding_Window_Time': [0.001, 0.001, 0.001, 0.002, 0.003], 'Memory_Ratio': [100, 100, 100, 100, 100] } df = pd.DataFrame(stream_data) fig = go.Figure() fig.add_trace(go.Scatter( x=df['Window_Size'], y=df['Full_Storage_Time'], mode='lines+markers', name='Full Storage', line=dict(color='red') )) fig.add_trace(go.Scatter( x=df['Window_Size'], y=df['Sliding_Window_Time'], mode='lines+markers', name='Sliding Window', line=dict(color='green') )) fig.update_xaxes(title="Window Size") fig.update_yaxes(title="Time (seconds)", type="log") fig.update_layout( title="Stream Processing: Less Memory = Faster!", template="plotly_dark", height=400 ) st.plotly_chart(fig, use_container_width=True) st.success("Sliding window (O(w) space) is faster due to cache locality!") with tabs[3]: st.subheader("Summary of Findings") findings = pd.DataFrame({ 'Experiment': ['Maze Solver', 'Sorting', 'Streaming'], 'Space Reduction': ['n → √n', 'n → √n', 'n → w'], 'Time Increase': ['√nƗ', '>1000Ɨ', '0.1Ɨ (faster!)'], 'Bottleneck': ['Recomputation', 'Disk I/O', 'Cache Locality'] }) st.table(findings) # Page: Real-World Systems elif page == "Real-World Systems": st.header("Space-Time Tradeoffs in Production") system = st.selectbox( "Choose a system", ["Databases", "Large Language Models", "Distributed Computing"] ) if system == "Databases": st.subheader("Database Query Processing") col1, col2 = st.columns(2) with col1: st.markdown("### Hash Join vs Nested Loop") memory_limit = st.slider("work_mem (MB)", 1, 1024, 64) table_size = st.slider("Table size (GB)", 1, 100, 10) # Simulate query planner decision if memory_limit > table_size * 10: join_type = "Hash Join" time_estimate = table_size * 0.1 memory_use = min(memory_limit, table_size * 50) else: join_type = "Nested Loop" time_estimate = table_size ** 2 * 0.01 memory_use = 1 st.metric("Selected Algorithm", join_type) st.metric("Estimated Time", f"{time_estimate:.1f} seconds") st.metric("Memory Usage", f"{memory_use} MB") with col2: # Visualization mem_range = np.logspace(0, 3, 100) hash_time = np.ones_like(mem_range) * table_size * 0.1 nested_time = np.ones_like(mem_range) * table_size ** 2 * 0.01 # Hash join only works with enough memory hash_time[mem_range < table_size * 10] = np.inf fig = go.Figure() fig.add_trace(go.Scatter( x=mem_range, y=hash_time, mode='lines', name='Hash Join', line=dict(color='blue') )) fig.add_trace(go.Scatter( x=mem_range, y=nested_time, mode='lines', name='Nested Loop', line=dict(color='red') )) fig.add_vline(x=memory_limit, line_dash="dash", line_color="green", annotation_text="Current work_mem") fig.update_xaxes(type="log", title="Memory Available (MB)") fig.update_yaxes(type="log", title="Query Time (seconds)") fig.update_layout( title="Join Algorithm Selection", template="plotly_dark", height=400 ) st.plotly_chart(fig, use_container_width=True) elif system == "Large Language Models": st.subheader("LLM Memory Optimizations") col1, col2 = st.columns([1, 2]) with col1: model_size = st.selectbox("Model Size", ["7B", "13B", "70B", "175B"]) optimization = st.multiselect( "Optimizations", ["Quantization (INT8)", "Flash Attention", "Multi-Query Attention"], default=[] ) # Calculate memory requirements base_memory = {"7B": 28, "13B": 52, "70B": 280, "175B": 700}[model_size] memory = base_memory speedup = 1.0 if "Quantization (INT8)" in optimization: memory /= 4 speedup *= 0.8 if "Flash Attention" in optimization: memory *= 0.7 speedup *= 0.9 if "Multi-Query Attention" in optimization: memory *= 0.6 speedup *= 0.95 st.metric("Memory Required", f"{memory:.0f} GB") st.metric("Relative Speed", f"{speedup:.2f}Ɨ") st.metric("Context Length", f"{int(100000 / (memory / base_memory))} tokens") with col2: # Create optimization impact chart categories = ['Memory', 'Speed', 'Context Length', 'Quality'] fig = go.Figure() # Baseline fig.add_trace(go.Scatterpolar( r=[100, 100, 100, 100], theta=categories, fill='toself', name='Baseline', line=dict(color='red') )) # With optimizations memory_score = (base_memory / memory) * 100 speed_score = speedup * 100 context_score = (memory_score) * 100 / 100 quality_score = 95 if optimization else 100 fig.add_trace(go.Scatterpolar( r=[memory_score, speed_score, context_score, quality_score], theta=categories, fill='toself', name='With Optimizations', line=dict(color='green') )) fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 200] )), showlegend=True, template="plotly_dark", title="Optimization Impact" ) st.plotly_chart(fig, use_container_width=True) elif system == "Distributed Computing": st.subheader("MapReduce Shuffle Memory") # Interactive shuffle buffer sizing cluster_size = st.slider("Cluster Size (nodes)", 10, 1000, 100) data_size = st.slider("Data Size (TB)", 1, 100, 10) # Calculate optimal buffer size data_per_node = data_size * 1024 / cluster_size # GB per node optimal_buffer = np.sqrt(data_per_node * 1024) # MB col1, col2, col3 = st.columns(3) with col1: st.metric("Data per Node", f"{data_per_node:.1f} GB") with col2: st.metric("Optimal Buffer Size", f"{optimal_buffer:.0f} MB") with col3: st.metric("Buffer/Data Ratio", f"1:{int(data_per_node * 1024 / optimal_buffer)}") # Visualization of shuffle performance buffer_sizes = np.logspace(1, 4, 100) # Performance model io_time = data_per_node * 1024 / buffer_sizes * 10 # More I/O with small buffers cpu_time = buffer_sizes / 100 # More CPU with large buffers total_time = io_time + cpu_time fig = go.Figure() fig.add_trace(go.Scatter( x=buffer_sizes, y=io_time, mode='lines', name='I/O Time', line=dict(color='red') )) fig.add_trace(go.Scatter( x=buffer_sizes, y=cpu_time, mode='lines', name='CPU Time', line=dict(color='blue') )) fig.add_trace(go.Scatter( x=buffer_sizes, y=total_time, mode='lines', name='Total Time', line=dict(color='green', width=3) )) fig.add_vline(x=optimal_buffer, line_dash="dash", line_color="white", annotation_text="√n Optimal") fig.update_xaxes(type="log", title="Shuffle Buffer Size (MB)") fig.update_yaxes(type="log", title="Time (seconds)") fig.update_layout( title="Shuffle Performance vs Buffer Size", template="plotly_dark", height=400 ) st.plotly_chart(fig, use_container_width=True) st.info("The optimal buffer size follows the √n pattern predicted by theory!") # Page: Tradeoff Calculator elif page == "Tradeoff Calculator": st.header("Space-Time Tradeoff Calculator") st.markdown("Calculate optimal configurations for your system") col1, col2 = st.columns(2) with col1: st.subheader("System Parameters") total_data = st.number_input("Total Data Size (GB)", min_value=1, value=100) available_memory = st.number_input("Available Memory (GB)", min_value=1, value=16) io_speed = st.slider("I/O Speed (MB/s)", 50, 5000, 500) cpu_speed = st.slider("CPU Speed (GFLOPS)", 10, 1000, 100) workload_type = st.selectbox( "Workload Type", ["Batch Processing", "Stream Processing", "Interactive Query", "ML Training"] ) with col2: st.subheader("Recommendations") # Calculate recommendations based on workload memory_ratio = available_memory / total_data if memory_ratio > 1: st.success("āœ… Everything fits in memory!") strategy = "In-memory processing" chunk_size = total_data elif memory_ratio > 0.1: st.info("šŸ“Š Use hybrid approach") strategy = "Partial caching with smart eviction" chunk_size = np.sqrt(total_data * available_memory) else: st.warning("āš ļø Heavy space constraints") strategy = "Streaming with checkpoints" chunk_size = available_memory / 10 st.metric("Recommended Strategy", strategy) st.metric("Optimal Chunk Size", f"{chunk_size:.1f} GB") # Time estimates if workload_type == "Batch Processing": time_memory = total_data / cpu_speed time_disk = total_data / io_speed * 1000 + total_data / cpu_speed * 2 time_optimal = total_data / np.sqrt(available_memory) * 10 else: time_memory = 1 time_disk = 100 time_optimal = 10 # Comparison chart fig = go.Figure(data=[ go.Bar(name='All in Memory', x=['Time'], y=[time_memory]), go.Bar(name='All on Disk', x=['Time'], y=[time_disk]), go.Bar(name='Optimal √n', x=['Time'], y=[time_optimal]) ]) fig.update_layout( title="Processing Time Comparison", yaxis_title="Time (seconds)", template="plotly_dark", height=300 ) st.plotly_chart(fig, use_container_width=True) # Page: Interactive Demos elif page == "Interactive Demos": st.header("Interactive Demonstrations") demo = st.selectbox( "Choose a demo", ["Sorting Visualizer", "Cache Simulator", "Attention Mechanism"] ) if demo == "Sorting Visualizer": st.subheader("Watch Space-Time Tradeoffs in Action") size = st.slider("Array Size", 10, 100, 50) algorithm = st.radio("Algorithm", ["In-Memory Sort", "External Sort with √n Memory"]) if st.button("Run Sorting"): # Simulate sorting progress = st.progress(0) status = st.empty() if algorithm == "In-Memory Sort": steps = size * np.log2(size) for i in range(int(steps)): progress.progress(i / steps) status.text(f"Comparing elements... Step {i}/{int(steps)}") st.success(f"Completed in {steps:.0f} operations using {size} memory units") else: chunks = int(np.sqrt(size)) total_steps = size * np.log2(size) * chunks for i in range(int(total_steps)): progress.progress(i / total_steps) if i % size == 0: status.text(f"Writing checkpoint {i//size}/{chunks}...") else: status.text(f"Processing... Step {i}/{int(total_steps)}") st.warning(f"Completed in {total_steps:.0f} operations using {chunks} memory units") elif demo == "Cache Simulator": st.subheader("Memory Hierarchy Simulation") # Create memory hierarchy visualization levels = { 'L1 Cache': {'size': 32, 'latency': 1}, 'L2 Cache': {'size': 256, 'latency': 10}, 'L3 Cache': {'size': 8192, 'latency': 50}, 'RAM': {'size': 32768, 'latency': 100}, 'SSD': {'size': 512000, 'latency': 10000} } access_pattern = st.selectbox( "Access Pattern", ["Sequential", "Random", "Strided"] ) working_set = st.slider("Working Set Size (KB)", 1, 100000, 1000, step=10) # Determine which level serves the request for level, specs in levels.items(): if working_set <= specs['size']: serving_level = level latency = specs['latency'] break col1, col2 = st.columns(2) with col1: st.metric("Data Served From", serving_level) st.metric("Average Latency", f"{latency} ns") st.metric("Throughput", f"{1000/latency:.1f} GB/s") with col2: # Visualization fig = go.Figure() sizes = [specs['size'] for specs in levels.values()] latencies = [specs['latency'] for specs in levels.values()] names = list(levels.keys()) fig.add_trace(go.Scatter( x=sizes, y=latencies, mode='markers+text', text=names, textposition="top center", marker=dict(size=20) )) fig.add_vline(x=working_set, line_dash="dash", line_color="red", annotation_text="Working Set") fig.update_xaxes(type="log", title="Capacity (KB)") fig.update_yaxes(type="log", title="Latency (ns)") fig.update_layout( title="Memory Hierarchy", template="plotly_dark", height=400 ) st.plotly_chart(fig, use_container_width=True) # Footer st.markdown("---") st.markdown("""

Created for the Ubiquity Project | Based on Ryan Williams' 2025 STOC paper

TIME[t] āŠ† SPACE[√(t log t)] - A fundamental limit of computation

""", unsafe_allow_html=True)