sqrtspace-experiments/experiments/checkpointed_sorting/test_quick.py
2025-07-20 03:56:21 -04:00

115 lines
3.6 KiB
Python

"""
Quick test to verify sorting experiment works with smaller parameters
"""
import os
import time
import tempfile
import numpy as np
import shutil
from scipy import stats
import sys
class SortingExperiment:
"""Compare different sorting algorithms with varying memory constraints"""
def __init__(self, data_size: int):
self.data_size = data_size
self.data = np.random.rand(data_size).astype(np.float32)
self.temp_dir = tempfile.mkdtemp()
def cleanup(self):
"""Clean up temporary files"""
shutil.rmtree(self.temp_dir)
def in_memory_sort(self) -> np.ndarray:
"""Standard in-memory sorting - O(n) space"""
return np.sort(self.data.copy())
def checkpoint_sort(self, memory_limit: int) -> np.ndarray:
"""External merge sort with checkpointing - O(√n) space"""
chunk_size = memory_limit // 4 # Reserve memory for merging
num_chunks = (self.data_size + chunk_size - 1) // chunk_size
# Phase 1: Sort chunks and write to disk
chunk_files = []
for i in range(num_chunks):
start = i * chunk_size
end = min((i + 1) * chunk_size, self.data_size)
# Sort chunk in memory
chunk = np.sort(self.data[start:end])
# Write to disk (checkpoint)
filename = os.path.join(self.temp_dir, f'chunk_{i}.npy')
np.save(filename, chunk)
chunk_files.append(filename)
# Clear chunk from memory
del chunk
# Phase 2: Simple merge (for quick test)
result = []
for f in chunk_files:
chunk = np.load(f)
result.extend(chunk.tolist())
# Final sort (not truly external, but for quick test)
result = np.sort(np.array(result))
# Cleanup chunk files
for f in chunk_files:
os.remove(f)
return result
def run_quick_test():
"""Run a quick test with smaller sizes"""
print("=== Quick Sorting Test ===\n")
# Small sizes for quick verification
sizes = [100, 500, 1000]
num_trials = 3
for size in sizes:
print(f"\nTesting with {size} elements ({num_trials} trials):")
in_memory_times = []
checkpoint_times = []
for trial in range(num_trials):
exp = SortingExperiment(size)
# In-memory sort
start = time.time()
result1 = exp.in_memory_sort()
time1 = time.time() - start
in_memory_times.append(time1)
# Checkpointed sort
memory_limit = int(np.sqrt(size) * 4)
start = time.time()
result2 = exp.checkpoint_sort(memory_limit)
time2 = time.time() - start
checkpoint_times.append(time2)
# Verify correctness
if trial == 0:
assert np.allclose(result1, result2), f"Results don't match for size {size}"
print(f" ✓ Correctness verified")
exp.cleanup()
# Calculate statistics
in_memory_mean = np.mean(in_memory_times)
in_memory_std = np.std(in_memory_times)
checkpoint_mean = np.mean(checkpoint_times)
checkpoint_std = np.std(checkpoint_times)
print(f" In-memory: {in_memory_mean:.6f}s ± {in_memory_std:.6f}s")
print(f" Checkpoint: {checkpoint_mean:.6f}s ± {checkpoint_std:.6f}s")
print(f" Slowdown: {checkpoint_mean/in_memory_mean:.1f}x")
if __name__ == "__main__":
run_quick_test()