""" Run final sorting experiment with parameters balanced for: - Statistical significance (10 trials) - Reasonable runtime (smaller sizes) - Demonstrating scaling behavior """ from rigorous_experiment import * import time def run_final_experiment(): """Run experiment with balanced parameters""" print("="*60) print("FINAL SORTING EXPERIMENT") print("Space-Time Tradeoffs in External Sorting") print("="*60) start_time = time.time() # Log environment env = ExperimentEnvironment.get_environment() print("\nExperimental Environment:") print(f" Platform: {env['platform']}") print(f" Python: {env['python_version']}") print(f" CPUs: {env['cpu_count']} physical, {env['cpu_count_logical']} logical") print(f" Memory: {env['memory_total'] / 1e9:.1f} GB total") if 'l3_cache' in env: print(f" L3 Cache: {env['l3_cache'] / 1e6:.1f} MB") # Save environment with open('experiment_environment.json', 'w') as f: json.dump(env, f, indent=2) # Run experiments - balanced for paper sizes = [1000, 2000, 5000, 10000, 20000] num_trials = 10 # Enough for statistical significance all_results = [] for size in sizes: print(f"\n{'='*40}") print(f"Testing n = {size:,}") print(f"{'='*40}") result = run_single_experiment(size, num_trials=num_trials) all_results.append(result) # Print detailed results print(f"\nSummary for n={size:,}:") print(f" Algorithm | Mean Time | Std Dev | Memory (peak)") print(f" -------------------|--------------|--------------|---------------") print(f" In-memory O(n) | {result['in_memory_mean']:10.6f}s | ±{result['in_memory_std']:.6f}s | {result['in_memory_memory_mean']/1024:.1f} KB") print(f" Checkpoint O(√n) | {result['checkpoint_mean']:10.6f}s | ±{result['checkpoint_std']:.6f}s | {result['checkpoint_memory_mean']/1024:.1f} KB") if 'checkpoint_ramdisk_mean' in result: print(f" Checkpoint (RAM) | {result['checkpoint_ramdisk_mean']:10.6f}s | N/A | {result['checkpoint_ramdisk_memory']/1024:.1f} KB") print(f"\n Slowdown (with I/O): {result['slowdown_disk']:.1f}x") print(f" Slowdown (RAM disk): {result['slowdown_ramdisk']:.1f}x") print(f" Pure I/O overhead: {result['io_overhead_factor']:.1f}x") else: print(f"\n Slowdown: {result['slowdown_disk']:.1f}x") print(f" Memory reduction: {result['in_memory_memory_mean'] / result['checkpoint_memory_mean']:.1f}x") # Save detailed results with open('final_experiment_results.json', 'w') as f: json.dump({ 'environment': env, 'parameters': { 'sizes': sizes, 'num_trials': num_trials }, 'results': all_results }, f, indent=2) # Create comprehensive plots create_comprehensive_plots(all_results) # Also create a simple summary plot for the paper create_paper_figure(all_results) elapsed = time.time() - start_time print(f"\n{'='*60}") print(f"EXPERIMENT COMPLETE in {elapsed:.1f} seconds") print("\nGenerated files:") print(" - experiment_environment.json") print(" - final_experiment_results.json") print(" - rigorous_sorting_analysis.png") print(" - memory_usage_analysis.png") print(" - paper_sorting_figure.png") print(f"{'='*60}") return all_results def create_paper_figure(all_results: List[Dict]): """Create a clean figure for the paper""" sizes = [r['size'] for r in all_results] fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) # Left plot: Time complexity in_memory_means = [r['in_memory_mean'] for r in all_results] checkpoint_means = [r['checkpoint_mean'] for r in all_results] ax1.loglog(sizes, in_memory_means, 'o-', label='In-memory O(n)', color='blue', linewidth=2, markersize=8) ax1.loglog(sizes, checkpoint_means, 's-', label='Checkpointed O(√n)', color='red', linewidth=2, markersize=8) # Add trend lines sizes_smooth = np.logspace(np.log10(1000), np.log10(20000), 100) # Fit actual data from scipy.optimize import curve_fit def power_law(x, a, b): return a * x**b popt1, _ = curve_fit(power_law, sizes, in_memory_means) popt2, _ = curve_fit(power_law, sizes, checkpoint_means) ax1.loglog(sizes_smooth, power_law(sizes_smooth, *popt1), 'b--', alpha=0.5, label=f'Fit: n^{{{popt1[1]:.2f}}}') ax1.loglog(sizes_smooth, power_law(sizes_smooth, *popt2), 'r--', alpha=0.5, label=f'Fit: n^{{{popt2[1]:.2f}}}') ax1.set_xlabel('Input Size (n)', fontsize=14) ax1.set_ylabel('Time (seconds)', fontsize=14) ax1.set_title('(a) Time Complexity', fontsize=16) ax1.legend(fontsize=12) ax1.grid(True, alpha=0.3) # Right plot: Slowdown factor slowdowns = [r['slowdown_disk'] for r in all_results] ax2.loglog(sizes, slowdowns, 'go-', linewidth=2, markersize=8, label='Observed') # Theoretical √n theory = np.sqrt(sizes_smooth / sizes[0]) * slowdowns[0] / np.sqrt(1) ax2.loglog(sizes_smooth, theory, 'k--', alpha=0.5, label='Theoretical √n') ax2.set_xlabel('Input Size (n)', fontsize=14) ax2.set_ylabel('Slowdown Factor', fontsize=14) ax2.set_title('(b) Cost of Space Reduction', fontsize=16) ax2.legend(fontsize=12) ax2.grid(True, alpha=0.3) plt.tight_layout() plt.savefig('paper_sorting_figure.png', dpi=300, bbox_inches='tight') plt.close() if __name__ == "__main__": results = run_final_experiment()