sqrtspace-python/tests/test_memory_pressure.py
2025-07-20 04:11:04 -04:00

309 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Memory pressure tests to verify √n behavior under constrained memory.
"""
import unittest
import gc
import os
import psutil
import resource
import tempfile
import shutil
import random
import time
from sqrtspace_spacetime import (
SpaceTimeArray, SpaceTimeDict, external_sort,
external_groupby, SpaceTimeConfig
)
class TestMemoryPressure(unittest.TestCase):
"""Test √n memory behavior under real memory constraints."""
def setUp(self):
"""Set up test environment."""
self.temp_dir = tempfile.mkdtemp()
self.process = psutil.Process()
# Configure strict memory limits
SpaceTimeConfig.set_defaults(
storage_path=self.temp_dir,
memory_limit=50 * 1024 * 1024, # 50MB limit
chunk_strategy='sqrt_n',
compression='gzip'
)
def tearDown(self):
"""Clean up test environment."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_array_under_memory_pressure(self):
"""Test SpaceTimeArray behavior when memory is constrained."""
print("\n=== Testing SpaceTimeArray under memory pressure ===")
# Create large objects that will force spillover
large_object_size = 1024 # 1KB per object
n_objects = 100_000 # Total: ~100MB if all in memory
array = SpaceTimeArray(threshold='auto')
# Track metrics
spillovers = 0
max_memory = 0
start_time = time.time()
# Add objects and monitor memory
for i in range(n_objects):
# Create a large object
obj = {
'id': i,
'data': 'x' * large_object_size,
'timestamp': time.time()
}
array.append(obj)
# Monitor every 1000 items
if i % 1000 == 0:
gc.collect()
current_memory = self.process.memory_info().rss / 1024 / 1024
max_memory = max(max_memory, current_memory)
if i > 0:
hot_count = len(array._hot_data)
cold_count = len(array._cold_indices)
print(f" Items: {i:,} | Memory: {current_memory:.1f}MB | "
f"Hot: {hot_count} | Cold: {cold_count}")
# Check if spillover is happening
if cold_count > spillovers:
spillovers = cold_count
elapsed = time.time() - start_time
# Verify all data is accessible
print("\nVerifying data accessibility...")
sample_indices = random.sample(range(n_objects), min(100, n_objects))
for idx in sample_indices:
obj = array[idx]
self.assertEqual(obj['id'], idx)
self.assertEqual(len(obj['data']), large_object_size)
# Calculate statistics
theoretical_sqrt_n = int(n_objects ** 0.5)
actual_hot_items = len(array._hot_data)
print(f"\nResults:")
print(f" Total items: {n_objects:,}")
print(f" Time taken: {elapsed:.2f} seconds")
print(f" Max memory used: {max_memory:.1f} MB")
print(f" Theoretical √n: {theoretical_sqrt_n:,}")
print(f" Actual hot items: {actual_hot_items:,}")
print(f" Cold items: {len(array._cold_indices):,}")
print(f" Memory efficiency: {n_objects / max_memory:.0f} items/MB")
# Assertions
self.assertEqual(len(array), n_objects)
self.assertLess(max_memory, 150) # Should use much less than 100MB
self.assertGreater(spillovers, 0) # Should have spilled to disk
self.assertLessEqual(actual_hot_items, theoretical_sqrt_n * 2) # Within 2x of √n
def test_dict_with_memory_limit(self):
"""Test SpaceTimeDict with strict memory limit."""
print("\n=== Testing SpaceTimeDict under memory pressure ===")
# Create dictionary with explicit threshold
cache = SpaceTimeDict(threshold=1000) # Keep only 1000 items in memory
n_items = 50_000
value_size = 500 # 500 bytes per value
# Track evictions
evictions = 0
start_time = time.time()
# Add items
for i in range(n_items):
key = f"key_{i:06d}"
value = {
'id': i,
'data': 'v' * value_size,
'accessed': 0
}
cache[key] = value
# Check for evictions
if i % 1000 == 0 and i > 0:
current_hot = len(cache._hot_data)
current_cold = len(cache._cold_keys)
if current_cold > evictions:
evictions = current_cold
print(f" Items: {i:,} | Hot: {current_hot} | Cold: {current_cold}")
elapsed = time.time() - start_time
# Test access patterns (LRU behavior)
print("\nTesting LRU behavior...")
# Access some old items
for i in range(0, 100, 10):
key = f"key_{i:06d}"
value = cache[key]
value['accessed'] += 1
# Add more items to trigger eviction
for i in range(n_items, n_items + 1000):
cache[f"key_{i:06d}"] = {'id': i, 'data': 'x' * value_size}
# Recent items should still be hot
stats = cache.get_stats()
print(f"\nResults:")
print(f" Total items: {len(cache):,}")
print(f" Time taken: {elapsed:.2f} seconds")
print(f" Hot items: {len(cache._hot_data)}")
print(f" Cold items: {len(cache._cold_keys)}")
print(f" Stats: {stats}")
# Verify all items accessible
sample_keys = random.sample([f"key_{i:06d}" for i in range(n_items)], 100)
for key in sample_keys:
self.assertIn(key, cache)
value = cache[key]
self.assertIsNotNone(value)
def test_algorithm_memory_scaling(self):
"""Test that algorithms scale with √n memory usage."""
print("\n=== Testing algorithm memory scaling ===")
datasets = [10_000, 40_000, 90_000, 160_000] # n, 4n, 9n, 16n
results = []
for n in datasets:
print(f"\nTesting with n = {n:,}")
# Generate data
data = [random.randint(1, 1_000_000) for _ in range(n)]
# Measure memory for sorting
gc.collect()
mem_before = self.process.memory_info().rss / 1024 / 1024
sorted_data = external_sort(data)
gc.collect()
mem_after = self.process.memory_info().rss / 1024 / 1024
mem_used = mem_after - mem_before
# Verify correctness
self.assertEqual(len(sorted_data), n)
for i in range(min(1000, len(sorted_data) - 1)):
self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
sqrt_n = int(n ** 0.5)
results.append({
'n': n,
'sqrt_n': sqrt_n,
'memory_used': mem_used,
'ratio': mem_used / max(sqrt_n * 8 / 1024 / 1024, 0.001) # 8 bytes per int
})
print(f" √n = {sqrt_n:,}")
print(f" Memory used: {mem_used:.2f} MB")
print(f" Ratio to theoretical: {results[-1]['ratio']:.2f}x")
# Verify √n scaling
print("\nScaling Analysis:")
print("n | √n | Memory (MB) | Ratio")
print("---------|---------|-------------|-------")
for r in results:
print(f"{r['n']:8,} | {r['sqrt_n']:7,} | {r['memory_used']:11.2f} | {r['ratio']:6.2f}x")
# Memory should scale roughly with √n
# As n increases 4x, memory should increase ~2x
for i in range(1, len(results)):
n_ratio = results[i]['n'] / results[i-1]['n']
mem_ratio = results[i]['memory_used'] / max(results[i-1]['memory_used'], 0.1)
expected_ratio = n_ratio ** 0.5
print(f"\nn increased {n_ratio:.1f}x, memory increased {mem_ratio:.1f}x "
f"(expected ~{expected_ratio:.1f}x)")
# Allow some variance due to overheads
self.assertLess(mem_ratio, expected_ratio * 3,
f"Memory scaling worse than √n: {mem_ratio:.1f}x vs {expected_ratio:.1f}x")
def test_concurrent_memory_pressure(self):
"""Test behavior under concurrent access with memory pressure."""
print("\n=== Testing concurrent access under memory pressure ===")
import threading
import queue
array = SpaceTimeArray(threshold=500)
errors = queue.Queue()
n_threads = 4
items_per_thread = 25_000
def worker(thread_id, start_idx):
try:
for i in range(items_per_thread):
item = {
'thread': thread_id,
'index': start_idx + i,
'data': f"thread_{thread_id}_item_{i}" * 50
}
array.append(item)
# Occasionally read random items
if i % 100 == 0 and len(array) > 10:
idx = random.randint(0, len(array) - 1)
_ = array[idx]
except Exception as e:
errors.put((thread_id, str(e)))
# Start threads
threads = []
start_time = time.time()
for i in range(n_threads):
t = threading.Thread(
target=worker,
args=(i, i * items_per_thread)
)
threads.append(t)
t.start()
# Monitor memory while threads run
max_memory = 0
while any(t.is_alive() for t in threads):
current_memory = self.process.memory_info().rss / 1024 / 1024
max_memory = max(max_memory, current_memory)
time.sleep(0.1)
# Wait for completion
for t in threads:
t.join()
elapsed = time.time() - start_time
# Check for errors
error_list = []
while not errors.empty():
error_list.append(errors.get())
print(f"\nResults:")
print(f" Threads: {n_threads}")
print(f" Total items: {n_threads * items_per_thread:,}")
print(f" Time taken: {elapsed:.2f} seconds")
print(f" Max memory: {max_memory:.1f} MB")
print(f" Errors: {len(error_list)}")
print(f" Final array size: {len(array):,}")
# Assertions
self.assertEqual(len(error_list), 0, f"Thread errors: {error_list}")
self.assertEqual(len(array), n_threads * items_per_thread)
self.assertLess(max_memory, 200) # Should handle memory pressure
if __name__ == "__main__":
unittest.main()