115 lines
3.6 KiB
Python
115 lines
3.6 KiB
Python
"""
|
|
Quick test to verify sorting experiment works with smaller parameters
|
|
"""
|
|
|
|
import os
|
|
import time
|
|
import tempfile
|
|
import numpy as np
|
|
import shutil
|
|
from scipy import stats
|
|
import sys
|
|
|
|
class SortingExperiment:
|
|
"""Compare different sorting algorithms with varying memory constraints"""
|
|
|
|
def __init__(self, data_size: int):
|
|
self.data_size = data_size
|
|
self.data = np.random.rand(data_size).astype(np.float32)
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
|
|
def cleanup(self):
|
|
"""Clean up temporary files"""
|
|
shutil.rmtree(self.temp_dir)
|
|
|
|
def in_memory_sort(self) -> np.ndarray:
|
|
"""Standard in-memory sorting - O(n) space"""
|
|
return np.sort(self.data.copy())
|
|
|
|
def checkpoint_sort(self, memory_limit: int) -> np.ndarray:
|
|
"""External merge sort with checkpointing - O(√n) space"""
|
|
chunk_size = memory_limit // 4 # Reserve memory for merging
|
|
num_chunks = (self.data_size + chunk_size - 1) // chunk_size
|
|
|
|
# Phase 1: Sort chunks and write to disk
|
|
chunk_files = []
|
|
for i in range(num_chunks):
|
|
start = i * chunk_size
|
|
end = min((i + 1) * chunk_size, self.data_size)
|
|
|
|
# Sort chunk in memory
|
|
chunk = np.sort(self.data[start:end])
|
|
|
|
# Write to disk (checkpoint)
|
|
filename = os.path.join(self.temp_dir, f'chunk_{i}.npy')
|
|
np.save(filename, chunk)
|
|
chunk_files.append(filename)
|
|
|
|
# Clear chunk from memory
|
|
del chunk
|
|
|
|
# Phase 2: Simple merge (for quick test)
|
|
result = []
|
|
for f in chunk_files:
|
|
chunk = np.load(f)
|
|
result.extend(chunk.tolist())
|
|
|
|
# Final sort (not truly external, but for quick test)
|
|
result = np.sort(np.array(result))
|
|
|
|
# Cleanup chunk files
|
|
for f in chunk_files:
|
|
os.remove(f)
|
|
|
|
return result
|
|
|
|
def run_quick_test():
|
|
"""Run a quick test with smaller sizes"""
|
|
|
|
print("=== Quick Sorting Test ===\n")
|
|
|
|
# Small sizes for quick verification
|
|
sizes = [100, 500, 1000]
|
|
num_trials = 3
|
|
|
|
for size in sizes:
|
|
print(f"\nTesting with {size} elements ({num_trials} trials):")
|
|
|
|
in_memory_times = []
|
|
checkpoint_times = []
|
|
|
|
for trial in range(num_trials):
|
|
exp = SortingExperiment(size)
|
|
|
|
# In-memory sort
|
|
start = time.time()
|
|
result1 = exp.in_memory_sort()
|
|
time1 = time.time() - start
|
|
in_memory_times.append(time1)
|
|
|
|
# Checkpointed sort
|
|
memory_limit = int(np.sqrt(size) * 4)
|
|
start = time.time()
|
|
result2 = exp.checkpoint_sort(memory_limit)
|
|
time2 = time.time() - start
|
|
checkpoint_times.append(time2)
|
|
|
|
# Verify correctness
|
|
if trial == 0:
|
|
assert np.allclose(result1, result2), f"Results don't match for size {size}"
|
|
print(f" ✓ Correctness verified")
|
|
|
|
exp.cleanup()
|
|
|
|
# Calculate statistics
|
|
in_memory_mean = np.mean(in_memory_times)
|
|
in_memory_std = np.std(in_memory_times)
|
|
checkpoint_mean = np.mean(checkpoint_times)
|
|
checkpoint_std = np.std(checkpoint_times)
|
|
|
|
print(f" In-memory: {in_memory_mean:.6f}s ± {in_memory_std:.6f}s")
|
|
print(f" Checkpoint: {checkpoint_mean:.6f}s ± {checkpoint_std:.6f}s")
|
|
print(f" Slowdown: {checkpoint_mean/in_memory_mean:.1f}x")
|
|
|
|
if __name__ == "__main__":
|
|
run_quick_test() |