sqrtspace-experiments/dashboard/app.py

"""
Interactive Dashboard for Space-Time Tradeoffs
Visualizes Williams' theoretical result and practical manifestations
"""

import streamlit as st
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import json
from pathlib import Path

# Page configuration
st.set_page_config(
    page_title="Space-Time Tradeoffs Dashboard",
    page_icon="📊",
    layout="wide"
)

# Custom CSS
st.markdown("""
<style>
    .main {padding-top: 1rem;}
    .stPlotlyChart {background-color: #0e1117;}
    div[data-testid="metric-container"] {
        background-color: #262730;
        border: 1px solid #333;
        padding: 5px 10px;
        border-radius: 5px;
        margin: 5px 0;
    }
</style>
""", unsafe_allow_html=True)

# Title and introduction
st.title("🔄 The Ubiquity of Space-Time Tradeoffs")
st.markdown("""
This dashboard demonstrates **Ryan Williams' 2025 result**: TIME[t] ⊆ SPACE[√(t log t)]

Explore how this theoretical bound manifests in real computing systems.
""")

# Sidebar navigation
page = st.sidebar.selectbox(
    "Choose a visualization",
    ["Overview", "Theoretical Explorer", "Experimental Results",
     "Real-World Systems", "Tradeoff Calculator", "Interactive Demos"]
)

# Helper functions
def create_space_time_curve(n_points=100):
    """Generate theoretical space-time tradeoff curve"""
    t = np.logspace(1, 6, n_points)
    s_williams = np.sqrt(t * np.log(t))
    s_naive = t
    s_minimal = np.log(t)

    return t, s_williams, s_naive, s_minimal

def create_3d_tradeoff_surface():
    """Create 3D visualization of space-time-quality tradeoffs"""
    space = np.logspace(0, 3, 50)
    time = np.logspace(0, 3, 50)
    S, T = np.meshgrid(space, time)

    # Quality as function of space and time
    Q = 1 / (1 + np.exp(-(np.log(S) + np.log(T) - 4)))

    return S, T, Q

# Page: Overview
if page == "Overview":
    st.header("Key Concepts")

    col1, col2, col3 = st.columns(3)

    with col1:
        st.metric("Theoretical Bound", "√(t log t)", "Space for time t")
        st.info("Any computation taking time t can be done with √(t log t) memory")

    with col2:
        st.metric("Practical Factor", "100-10,000×", "Constant overhead")
        st.warning("Real systems have I/O, cache hierarchies, coordination costs")

    with col3:
        st.metric("Ubiquity", "Everywhere", "In modern systems")
        st.success("Databases, ML, distributed systems all use these tradeoffs")

    # Main visualization
    st.subheader("The Fundamental Tradeoff")

    t, s_williams, s_naive, s_minimal = create_space_time_curve()

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=t, y=s_naive,
        mode='lines',
        name='Naive (Space = Time)',
        line=dict(color='red', dash='dash')
    ))

    fig.add_trace(go.Scatter(
        x=t, y=s_williams,
        mode='lines',
        name='Williams\' Bound: √(t log t)',
        line=dict(color='blue', width=3)
    ))

    fig.add_trace(go.Scatter(
        x=t, y=s_minimal,
        mode='lines',
        name='Minimal Space: log(t)',
        line=dict(color='green', dash='dot')
    ))

    fig.update_xaxes(type="log", title="Time (t)")
    fig.update_yaxes(type="log", title="Space (s)")
    fig.update_layout(
        title="Theoretical Space-Time Bounds",
        height=500,
        hovermode='x',
        template="plotly_dark"
    )

    st.plotly_chart(fig, use_container_width=True)

# Page: Theoretical Explorer
elif page == "Theoretical Explorer":
    st.header("Interactive Theoretical Explorer")

    col1, col2 = st.columns([1, 2])

    with col1:
        st.subheader("Parameters")

        time_complexity = st.slider(
            "Time Complexity (log scale)",
            min_value=1.0,
            max_value=6.0,
            value=3.0,
            step=0.1
        )

        show_practical = st.checkbox("Show practical bounds", value=True)
        constant_factor = st.slider(
            "Constant factor",
            min_value=1,
            max_value=1000,
            value=100,
            disabled=not show_practical
        )

        t_value = 10 ** time_complexity
        s_theory = np.sqrt(t_value * np.log(t_value))
        s_practical = s_theory * constant_factor if show_practical else s_theory

        st.metric("Time (t)", f"{t_value:,.0f}")
        st.metric("Space (theory)", f"{s_theory:,.0f}")
        if show_practical:
            st.metric("Space (practical)", f"{s_practical:,.0f}")

    with col2:
        # Create visualization
        t_range = np.logspace(1, 6, 100)
        s_range_theory = np.sqrt(t_range * np.log(t_range))
        s_range_practical = s_range_theory * constant_factor

        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=t_range, y=s_range_theory,
            mode='lines',
            name='Theoretical Bound',
            line=dict(color='blue', width=2)
        ))

        if show_practical:
            fig.add_trace(go.Scatter(
                x=t_range, y=s_range_practical,
                mode='lines',
                name=f'Practical ({constant_factor}× overhead)',
                line=dict(color='orange', width=2)
            ))

        # Add current point
        fig.add_trace(go.Scatter(
            x=[t_value], y=[s_theory],
            mode='markers',
            name='Current Selection',
            marker=dict(size=15, color='red', symbol='star')
        ))

        fig.update_xaxes(type="log", title="Time")
        fig.update_yaxes(type="log", title="Space")
        fig.update_layout(
            title="Space Requirements for Time-Bounded Computation",
            height=500,
            template="plotly_dark"
        )

        st.plotly_chart(fig, use_container_width=True)

# Page: Experimental Results
elif page == "Experimental Results":
    st.header("Experimental Validation")

    tabs = st.tabs(["Maze Solver", "Sorting", "Streaming", "Summary"])

    with tabs[0]:
        st.subheader("Maze Solving Algorithms")

        # Simulated data (in practice, load from experiment results)
        maze_data = pd.DataFrame({
            'Size': [20, 30, 40, 50],
            'BFS_Time': [0.001, 0.003, 0.008, 0.015],
            'BFS_Memory': [1600, 3600, 6400, 10000],
            'Limited_Time': [0.01, 0.05, 0.15, 0.35],
            'Limited_Memory': [80, 120, 160, 200]
        })

        fig = make_subplots(
            rows=1, cols=2,
            subplot_titles=("Time Complexity", "Memory Usage")
        )

        fig.add_trace(
            go.Scatter(x=maze_data['Size'], y=maze_data['BFS_Time'],
                      name='BFS', mode='lines+markers'),
            row=1, col=1
        )

        fig.add_trace(
            go.Scatter(x=maze_data['Size'], y=maze_data['Limited_Time'],
                      name='Memory-Limited', mode='lines+markers'),
            row=1, col=1
        )

        fig.add_trace(
            go.Scatter(x=maze_data['Size'], y=maze_data['BFS_Memory'],
                      name='BFS', mode='lines+markers', showlegend=False),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(x=maze_data['Size'], y=maze_data['Limited_Memory'],
                      name='Memory-Limited', mode='lines+markers', showlegend=False),
            row=1, col=2
        )

        fig.update_xaxes(title_text="Maze Size", row=1, col=1)
        fig.update_xaxes(title_text="Maze Size", row=1, col=2)
        fig.update_yaxes(title_text="Time (s)", row=1, col=1)
        fig.update_yaxes(title_text="Memory (cells)", row=1, col=2)

        fig.update_layout(height=400, template="plotly_dark")
        st.plotly_chart(fig, use_container_width=True)

        st.info("Memory-limited DFS uses √n memory but requires ~n√n time due to recomputation")

    with tabs[1]:
        st.subheader("Sorting with Checkpoints")

        sort_times = {
            'Size': [1000, 5000, 10000, 20000],
            'In_Memory': [0.00001, 0.0001, 0.0003, 0.0008],
            'Checkpointed': [0.268, 2.5, 8.2, 25.3],
            'Ratio': [26800, 25000, 27333, 31625]
        }

        df = pd.DataFrame(sort_times)

        fig = px.bar(df, x='Size', y=['In_Memory', 'Checkpointed'],
                     title="Sorting Time: In-Memory vs Checkpointed",
                     labels={'value': 'Time (seconds)', 'variable': 'Method'},
                     log_y=True,
                     barmode='group',
                     template="plotly_dark")

        st.plotly_chart(fig, use_container_width=True)

        st.warning("Checkpointed sorting shows massive overhead (>1000×) due to disk I/O")

    with tabs[2]:
        st.subheader("Stream Processing")

        stream_data = {
            'Window_Size': [10, 50, 100, 500, 1000],
            'Full_Storage_Time': [0.005, 0.025, 0.05, 0.25, 0.5],
            'Sliding_Window_Time': [0.001, 0.001, 0.001, 0.002, 0.003],
            'Memory_Ratio': [100, 100, 100, 100, 100]
        }

        df = pd.DataFrame(stream_data)

        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=df['Window_Size'], y=df['Full_Storage_Time'],
            mode='lines+markers',
            name='Full Storage',
            line=dict(color='red')
        ))

        fig.add_trace(go.Scatter(
            x=df['Window_Size'], y=df['Sliding_Window_Time'],
            mode='lines+markers',
            name='Sliding Window',
            line=dict(color='green')
        ))

        fig.update_xaxes(title="Window Size")
        fig.update_yaxes(title="Time (seconds)", type="log")
        fig.update_layout(
            title="Stream Processing: Less Memory = Faster!",
            template="plotly_dark",
            height=400
        )

        st.plotly_chart(fig, use_container_width=True)

        st.success("Sliding window (O(w) space) is faster due to cache locality!")

    with tabs[3]:
        st.subheader("Summary of Findings")

        findings = pd.DataFrame({
            'Experiment': ['Maze Solver', 'Sorting', 'Streaming'],
            'Space Reduction': ['n → √n', 'n → √n', 'n → w'],
            'Time Increase': ['√n×', '>1000×', '0.1× (faster!)'],
            'Bottleneck': ['Recomputation', 'Disk I/O', 'Cache Locality']
        })

        st.table(findings)

# Page: Real-World Systems
elif page == "Real-World Systems":
    st.header("Space-Time Tradeoffs in Production")

    system = st.selectbox(
        "Choose a system",
        ["Databases", "Large Language Models", "Distributed Computing"]
    )

    if system == "Databases":
        st.subheader("Database Query Processing")

        col1, col2 = st.columns(2)

        with col1:
            st.markdown("### Hash Join vs Nested Loop")

            memory_limit = st.slider("work_mem (MB)", 1, 1024, 64)
            table_size = st.slider("Table size (GB)", 1, 100, 10)

            # Simulate query planner decision
            if memory_limit > table_size * 10:
                join_type = "Hash Join"
                time_estimate = table_size * 0.1
                memory_use = min(memory_limit, table_size * 50)
            else:
                join_type = "Nested Loop"
                time_estimate = table_size ** 2 * 0.01
                memory_use = 1

            st.metric("Selected Algorithm", join_type)
            st.metric("Estimated Time", f"{time_estimate:.1f} seconds")
            st.metric("Memory Usage", f"{memory_use} MB")

        with col2:
            # Visualization
            mem_range = np.logspace(0, 3, 100)
            hash_time = np.ones_like(mem_range) * table_size * 0.1
            nested_time = np.ones_like(mem_range) * table_size ** 2 * 0.01

            # Hash join only works with enough memory
            hash_time[mem_range < table_size * 10] = np.inf

            fig = go.Figure()

            fig.add_trace(go.Scatter(
                x=mem_range, y=hash_time,
                mode='lines',
                name='Hash Join',
                line=dict(color='blue')
            ))

            fig.add_trace(go.Scatter(
                x=mem_range, y=nested_time,
                mode='lines',
                name='Nested Loop',
                line=dict(color='red')
            ))

            fig.add_vline(x=memory_limit, line_dash="dash", line_color="green",
                         annotation_text="Current work_mem")

            fig.update_xaxes(type="log", title="Memory Available (MB)")
            fig.update_yaxes(type="log", title="Query Time (seconds)")
            fig.update_layout(
                title="Join Algorithm Selection",
                template="plotly_dark",
                height=400
            )

            st.plotly_chart(fig, use_container_width=True)

    elif system == "Large Language Models":
        st.subheader("LLM Memory Optimizations")

        col1, col2 = st.columns([1, 2])

        with col1:
            model_size = st.selectbox("Model Size", ["7B", "13B", "70B", "175B"])
            optimization = st.multiselect(
                "Optimizations",
                ["Quantization (INT8)", "Flash Attention", "Multi-Query Attention"],
                default=[]
            )

            # Calculate memory requirements
            base_memory = {"7B": 28, "13B": 52, "70B": 280, "175B": 700}[model_size]
            memory = base_memory
            speedup = 1.0

            if "Quantization (INT8)" in optimization:
                memory /= 4
                speedup *= 0.8

            if "Flash Attention" in optimization:
                memory *= 0.7
                speedup *= 0.9

            if "Multi-Query Attention" in optimization:
                memory *= 0.6
                speedup *= 0.95

            st.metric("Memory Required", f"{memory:.0f} GB")
            st.metric("Relative Speed", f"{speedup:.2f}×")
            st.metric("Context Length", f"{int(100000 / (memory / base_memory))} tokens")

        with col2:
            # Create optimization impact chart
            categories = ['Memory', 'Speed', 'Context Length', 'Quality']

            fig = go.Figure()

            # Baseline
            fig.add_trace(go.Scatterpolar(
                r=[100, 100, 100, 100],
                theta=categories,
                fill='toself',
                name='Baseline',
                line=dict(color='red')
            ))

            # With optimizations
            memory_score = (base_memory / memory) * 100
            speed_score = speedup * 100
            context_score = (memory_score) * 100 / 100
            quality_score = 95 if optimization else 100

            fig.add_trace(go.Scatterpolar(
                r=[memory_score, speed_score, context_score, quality_score],
                theta=categories,
                fill='toself',
                name='With Optimizations',
                line=dict(color='green')
            ))

            fig.update_layout(
                polar=dict(
                    radialaxis=dict(
                        visible=True,
                        range=[0, 200]
                    )),
                showlegend=True,
                template="plotly_dark",
                title="Optimization Impact"
            )

            st.plotly_chart(fig, use_container_width=True)

    elif system == "Distributed Computing":
        st.subheader("MapReduce Shuffle Memory")

        # Interactive shuffle buffer sizing
        cluster_size = st.slider("Cluster Size (nodes)", 10, 1000, 100)
        data_size = st.slider("Data Size (TB)", 1, 100, 10)

        # Calculate optimal buffer size
        data_per_node = data_size * 1024 / cluster_size  # GB per node
        optimal_buffer = np.sqrt(data_per_node * 1024)  # MB

        col1, col2, col3 = st.columns(3)

        with col1:
            st.metric("Data per Node", f"{data_per_node:.1f} GB")
        with col2:
            st.metric("Optimal Buffer Size", f"{optimal_buffer:.0f} MB")
        with col3:
            st.metric("Buffer/Data Ratio", f"1:{int(data_per_node * 1024 / optimal_buffer)}")

        # Visualization of shuffle performance
        buffer_sizes = np.logspace(1, 4, 100)

        # Performance model
        io_time = data_per_node * 1024 / buffer_sizes * 10  # More I/O with small buffers
        cpu_time = buffer_sizes / 100  # More CPU with large buffers
        total_time = io_time + cpu_time

        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=buffer_sizes, y=io_time,
            mode='lines',
            name='I/O Time',
            line=dict(color='red')
        ))

        fig.add_trace(go.Scatter(
            x=buffer_sizes, y=cpu_time,
            mode='lines',
            name='CPU Time',
            line=dict(color='blue')
        ))

        fig.add_trace(go.Scatter(
            x=buffer_sizes, y=total_time,
            mode='lines',
            name='Total Time',
            line=dict(color='green', width=3)
        ))

        fig.add_vline(x=optimal_buffer, line_dash="dash", line_color="white",
                     annotation_text="√n Optimal")

        fig.update_xaxes(type="log", title="Shuffle Buffer Size (MB)")
        fig.update_yaxes(type="log", title="Time (seconds)")
        fig.update_layout(
            title="Shuffle Performance vs Buffer Size",
            template="plotly_dark",
            height=400
        )

        st.plotly_chart(fig, use_container_width=True)

        st.info("The optimal buffer size follows the √n pattern predicted by theory!")

# Page: Tradeoff Calculator
elif page == "Tradeoff Calculator":
    st.header("Space-Time Tradeoff Calculator")

    st.markdown("Calculate optimal configurations for your system")

    col1, col2 = st.columns(2)

    with col1:
        st.subheader("System Parameters")

        total_data = st.number_input("Total Data Size (GB)", min_value=1, value=100)
        available_memory = st.number_input("Available Memory (GB)", min_value=1, value=16)

        io_speed = st.slider("I/O Speed (MB/s)", 50, 5000, 500)
        cpu_speed = st.slider("CPU Speed (GFLOPS)", 10, 1000, 100)

        workload_type = st.selectbox(
            "Workload Type",
            ["Batch Processing", "Stream Processing", "Interactive Query", "ML Training"]
        )

    with col2:
        st.subheader("Recommendations")

        # Calculate recommendations based on workload
        memory_ratio = available_memory / total_data

        if memory_ratio > 1:
            st.success("✅ Everything fits in memory!")
            strategy = "In-memory processing"
            chunk_size = total_data
        elif memory_ratio > 0.1:
            st.info("📊 Use hybrid approach")
            strategy = "Partial caching with smart eviction"
            chunk_size = np.sqrt(total_data * available_memory)
        else:
            st.warning("⚠️ Heavy space constraints")
            strategy = "Streaming with checkpoints"
            chunk_size = available_memory / 10

        st.metric("Recommended Strategy", strategy)
        st.metric("Optimal Chunk Size", f"{chunk_size:.1f} GB")

        # Time estimates
        if workload_type == "Batch Processing":
            time_memory = total_data / cpu_speed
            time_disk = total_data / io_speed * 1000 + total_data / cpu_speed * 2
            time_optimal = total_data / np.sqrt(available_memory) * 10
        else:
            time_memory = 1
            time_disk = 100
            time_optimal = 10

        # Comparison chart
        fig = go.Figure(data=[
            go.Bar(name='All in Memory', x=['Time'], y=[time_memory]),
            go.Bar(name='All on Disk', x=['Time'], y=[time_disk]),
            go.Bar(name='Optimal √n', x=['Time'], y=[time_optimal])
        ])

        fig.update_layout(
            title="Processing Time Comparison",
            yaxis_title="Time (seconds)",
            template="plotly_dark",
            height=300
        )

        st.plotly_chart(fig, use_container_width=True)

# Page: Interactive Demos
elif page == "Interactive Demos":
    st.header("Interactive Demonstrations")

    demo = st.selectbox(
        "Choose a demo",
        ["Sorting Visualizer", "Cache Simulator", "Attention Mechanism"]
    )

    if demo == "Sorting Visualizer":
        st.subheader("Watch Space-Time Tradeoffs in Action")

        size = st.slider("Array Size", 10, 100, 50)
        algorithm = st.radio("Algorithm", ["In-Memory Sort", "External Sort with √n Memory"])

        if st.button("Run Sorting"):
            # Simulate sorting
            progress = st.progress(0)
            status = st.empty()

            if algorithm == "In-Memory Sort":
                steps = size * np.log2(size)
                for i in range(int(steps)):
                    progress.progress(i / steps)
                    status.text(f"Comparing elements... Step {i}/{int(steps)}")
                st.success(f"Completed in {steps:.0f} operations using {size} memory units")
            else:
                chunks = int(np.sqrt(size))
                total_steps = size * np.log2(size) * chunks
                for i in range(int(total_steps)):
                    progress.progress(i / total_steps)
                    if i % size == 0:
                        status.text(f"Writing checkpoint {i//size}/{chunks}...")
                    else:
                        status.text(f"Processing... Step {i}/{int(total_steps)}")
                st.warning(f"Completed in {total_steps:.0f} operations using {chunks} memory units")

    elif demo == "Cache Simulator":
        st.subheader("Memory Hierarchy Simulation")

        # Create memory hierarchy visualization
        levels = {
            'L1 Cache': {'size': 32, 'latency': 1},
            'L2 Cache': {'size': 256, 'latency': 10},
            'L3 Cache': {'size': 8192, 'latency': 50},
            'RAM': {'size': 32768, 'latency': 100},
            'SSD': {'size': 512000, 'latency': 10000}
        }

        access_pattern = st.selectbox(
            "Access Pattern",
            ["Sequential", "Random", "Strided"]
        )

        working_set = st.slider("Working Set Size (KB)", 1, 100000, 1000, step=10)

        # Determine which level serves the request
        for level, specs in levels.items():
            if working_set <= specs['size']:
                serving_level = level
                latency = specs['latency']
                break

        col1, col2 = st.columns(2)

        with col1:
            st.metric("Data Served From", serving_level)
            st.metric("Average Latency", f"{latency} ns")
            st.metric("Throughput", f"{1000/latency:.1f} GB/s")

        with col2:
            # Visualization
            fig = go.Figure()

            sizes = [specs['size'] for specs in levels.values()]
            latencies = [specs['latency'] for specs in levels.values()]
            names = list(levels.keys())

            fig.add_trace(go.Scatter(
                x=sizes, y=latencies,
                mode='markers+text',
                text=names,
                textposition="top center",
                marker=dict(size=20)
            ))

            fig.add_vline(x=working_set, line_dash="dash", line_color="red",
                         annotation_text="Working Set")

            fig.update_xaxes(type="log", title="Capacity (KB)")
            fig.update_yaxes(type="log", title="Latency (ns)")
            fig.update_layout(
                title="Memory Hierarchy",
                template="plotly_dark",
                height=400
            )

            st.plotly_chart(fig, use_container_width=True)

# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center'>
    <p>Created for the Ubiquity Project | Based on Ryan Williams' 2025 STOC paper</p>
    <p>TIME[t] ⊆ SPACE[√(t log t)] - A fundamental limit of computation</p>
</div>
""", unsafe_allow_html=True)