sqrtspace-python/tests/test_spacetime_array.py
2025-07-20 04:11:04 -04:00

202 lines
6.5 KiB
Python

#!/usr/bin/env python3
"""
Tests for SpaceTimeArray with memory pressure simulation.
"""
import unittest
import tempfile
import shutil
import os
import gc
import psutil
from sqrtspace_spacetime import SpaceTimeArray, SpaceTimeConfig
class TestSpaceTimeArray(unittest.TestCase):
"""Test SpaceTimeArray functionality."""
def setUp(self):
"""Set up test environment."""
self.temp_dir = tempfile.mkdtemp()
SpaceTimeConfig.set_defaults(
storage_path=self.temp_dir,
memory_limit=50 * 1024 * 1024, # 50MB for testing
chunk_strategy='sqrt_n'
)
def tearDown(self):
"""Clean up test environment."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_basic_operations(self):
"""Test basic array operations."""
array = SpaceTimeArray(threshold=100)
# Test append
for i in range(50):
array.append(f"item_{i}")
self.assertEqual(len(array), 50)
self.assertEqual(array[0], "item_0")
self.assertEqual(array[49], "item_49")
# Test negative indexing
self.assertEqual(array[-1], "item_49")
self.assertEqual(array[-50], "item_0")
# Test slice
slice_result = array[10:20]
self.assertEqual(len(slice_result), 10)
self.assertEqual(slice_result[0], "item_10")
def test_automatic_spillover(self):
"""Test automatic spillover to disk."""
# Create array with small threshold
array = SpaceTimeArray(threshold=10)
# Add more items than threshold
for i in range(100):
array.append(f"value_{i}")
# Check that spillover happened
self.assertEqual(len(array), 100)
self.assertGreater(len(array._cold_indices), 0)
self.assertLessEqual(len(array._hot_data), array.threshold)
# Verify all items are accessible
for i in range(100):
self.assertEqual(array[i], f"value_{i}")
def test_memory_pressure_handling(self):
"""Test behavior under memory pressure."""
# Create array with auto threshold
array = SpaceTimeArray()
# Generate large data items
large_item = "x" * 10000 # 10KB string
# Add items until memory pressure detected
for i in range(1000):
array.append(f"{large_item}_{i}")
# Check memory usage periodically
if i % 100 == 0:
process = psutil.Process()
memory_mb = process.memory_info().rss / 1024 / 1024
# Ensure we're not using excessive memory
self.assertLess(memory_mb, 200, f"Memory usage too high at iteration {i}")
# Verify all items still accessible
self.assertEqual(len(array), 1000)
self.assertTrue(array[0].endswith("_0"))
self.assertTrue(array[999].endswith("_999"))
def test_large_dataset_sqrt_n_memory(self):
"""Test √n memory usage with large dataset."""
# Configure for sqrt_n strategy
SpaceTimeConfig.set_defaults(chunk_strategy='sqrt_n')
n = 10000 # Total items
sqrt_n = int(n ** 0.5) # Expected memory items
array = SpaceTimeArray()
# Track initial memory
gc.collect()
process = psutil.Process()
initial_memory = process.memory_info().rss
# Add n items
for i in range(n):
array.append({"id": i, "data": f"item_{i}" * 10})
# Force garbage collection
gc.collect()
# Check memory usage
final_memory = process.memory_info().rss
memory_increase_mb = (final_memory - initial_memory) / 1024 / 1024
# Verify sqrt_n behavior
self.assertEqual(len(array), n)
self.assertLessEqual(len(array._hot_data), sqrt_n * 2) # Allow some buffer
self.assertGreater(len(array._cold_indices), n - sqrt_n * 2)
# Memory should be much less than storing all items
# Rough estimate: each item ~100 bytes, so n items = ~1MB
# With sqrt_n, should use ~10KB in memory
self.assertLess(memory_increase_mb, 10, f"Memory increase {memory_increase_mb}MB is too high")
# Verify random access still works
import random
for _ in range(100):
idx = random.randint(0, n - 1)
self.assertEqual(array[idx]["id"], idx)
def test_persistence_across_sessions(self):
"""Test data persistence when array is recreated."""
storage_path = os.path.join(self.temp_dir, "persist_test")
# Create and populate array
array1 = SpaceTimeArray(threshold=10, storage_path=storage_path)
for i in range(50):
array1.append(f"persistent_{i}")
# Force spillover
array1._check_and_spill()
del array1
# Create new array with same storage path
array2 = SpaceTimeArray(threshold=10, storage_path=storage_path)
# Data should be accessible
self.assertEqual(len(array2), 50)
for i in range(50):
self.assertEqual(array2[i], f"persistent_{i}")
def test_concurrent_access(self):
"""Test thread-safe access to array."""
import threading
array = SpaceTimeArray(threshold=100)
errors = []
def writer(start, count):
try:
for i in range(start, start + count):
array.append(f"thread_{i}")
except Exception as e:
errors.append(e)
def reader(count):
try:
for _ in range(count):
if len(array) > 0:
_ = array[0] # Just access, don't verify
except Exception as e:
errors.append(e)
# Create threads
threads = []
for i in range(5):
t = threading.Thread(target=writer, args=(i * 100, 100))
threads.append(t)
for i in range(3):
t = threading.Thread(target=reader, args=(50,))
threads.append(t)
# Run threads
for t in threads:
t.start()
for t in threads:
t.join()
# Check for errors
self.assertEqual(len(errors), 0, f"Thread errors: {errors}")
self.assertEqual(len(array), 500)
if __name__ == "__main__":
unittest.main()