From e0ca63ebdf04e360169a798df27b64f73787dd38 Mon Sep 17 00:00:00 2001 From: Dave Friedel Date: Sun, 20 Jul 2025 04:08:08 -0400 Subject: [PATCH] Initial --- CONTRIBUTING.md | 154 +++++ LICENSE | 190 ++++++ README.md | 578 ++++++++++++++++++ config/services.yaml | 48 ++ config/spacetime.php | 110 ++++ examples/comprehensive_example.php | 206 +++++++ examples/laravel-app/README.md | 375 ++++++++++++ .../Http/Controllers/ProductController.php | 194 ++++++ .../app/Jobs/ProcessLargeDataset.php | 239 ++++++++ .../app/Services/ProductService.php | 224 +++++++ src/Algorithms/ExternalGroupBy.php | 196 ++++++ src/Algorithms/ExternalSort.php | 163 +++++ src/Batch/BatchJob.php | 122 ++++ src/Batch/BatchProcessor.php | 267 ++++++++ src/Batch/BatchResult.php | 206 +++++++ src/Checkpoint/CacheCheckpointStorage.php | 57 ++ src/Checkpoint/CheckpointManager.php | 126 ++++ src/Checkpoint/CheckpointStorage.php | 41 ++ src/Checkpoint/DatabaseCheckpointStorage.php | 94 +++ src/Checkpoint/FileCheckpointStorage.php | 106 ++++ src/Collections/SpaceTimeArray.php | 350 +++++++++++ src/Database/SpaceTimeQueryBuilder.php | 288 +++++++++ src/File/CsvExporter.php | 200 ++++++ src/File/CsvReader.php | 198 ++++++ src/File/JsonLinesProcessor.php | 198 ++++++ src/Laravel/SpaceTimeServiceProvider.php | 155 +++++ src/Memory/Handlers/CacheEvictionHandler.php | 85 +++ .../Handlers/GarbageCollectionHandler.php | 55 ++ src/Memory/Handlers/LoggingHandler.php | 59 ++ src/Memory/MemoryPressureMonitor.php | 168 +++++ src/SpaceTimeConfig.php | 196 ++++++ src/Storage/ExternalStorage.php | 187 ++++++ src/Streams/Iterators/ChunkIterator.php | 60 ++ src/Streams/Iterators/FilterIterator.php | 22 + src/Streams/Iterators/FlatMapIterator.php | 79 +++ src/Streams/Iterators/MapIterator.php | 27 + src/Streams/Iterators/SkipIterator.php | 39 ++ src/Streams/Iterators/TakeIterator.php | 46 ++ src/Streams/SpaceTimeStream.php | 523 ++++++++++++++++ src/Symfony/Command/ProcessFileCommand.php | 212 +++++++ src/Symfony/SpaceTimeBundle.php | 68 +++ tests/Algorithms/ExternalGroupByTest.php | 150 +++++ tests/Algorithms/ExternalSortTest.php | 111 ++++ tests/Batch/BatchProcessorTest.php | 132 ++++ tests/Checkpoint/CheckpointManagerTest.php | 220 +++++++ tests/Collections/SpaceTimeArrayTest.php | 135 ++++ tests/Memory/MemoryPressureMonitorTest.php | 93 +++ tests/Streams/SpaceTimeStreamTest.php | 161 +++++ 48 files changed, 7913 insertions(+) create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 config/services.yaml create mode 100644 config/spacetime.php create mode 100644 examples/comprehensive_example.php create mode 100644 examples/laravel-app/README.md create mode 100644 examples/laravel-app/app/Http/Controllers/ProductController.php create mode 100644 examples/laravel-app/app/Jobs/ProcessLargeDataset.php create mode 100644 examples/laravel-app/app/Services/ProductService.php create mode 100644 src/Algorithms/ExternalGroupBy.php create mode 100644 src/Algorithms/ExternalSort.php create mode 100644 src/Batch/BatchJob.php create mode 100644 src/Batch/BatchProcessor.php create mode 100644 src/Batch/BatchResult.php create mode 100644 src/Checkpoint/CacheCheckpointStorage.php create mode 100644 src/Checkpoint/CheckpointManager.php create mode 100644 src/Checkpoint/CheckpointStorage.php create mode 100644 src/Checkpoint/DatabaseCheckpointStorage.php create mode 100644 src/Checkpoint/FileCheckpointStorage.php create mode 100644 src/Collections/SpaceTimeArray.php create mode 100644 src/Database/SpaceTimeQueryBuilder.php create mode 100644 src/File/CsvExporter.php create mode 100644 src/File/CsvReader.php create mode 100644 src/File/JsonLinesProcessor.php create mode 100644 src/Laravel/SpaceTimeServiceProvider.php create mode 100644 src/Memory/Handlers/CacheEvictionHandler.php create mode 100644 src/Memory/Handlers/GarbageCollectionHandler.php create mode 100644 src/Memory/Handlers/LoggingHandler.php create mode 100644 src/Memory/MemoryPressureMonitor.php create mode 100644 src/SpaceTimeConfig.php create mode 100644 src/Storage/ExternalStorage.php create mode 100644 src/Streams/Iterators/ChunkIterator.php create mode 100644 src/Streams/Iterators/FilterIterator.php create mode 100644 src/Streams/Iterators/FlatMapIterator.php create mode 100644 src/Streams/Iterators/MapIterator.php create mode 100644 src/Streams/Iterators/SkipIterator.php create mode 100644 src/Streams/Iterators/TakeIterator.php create mode 100644 src/Streams/SpaceTimeStream.php create mode 100644 src/Symfony/Command/ProcessFileCommand.php create mode 100644 src/Symfony/SpaceTimeBundle.php create mode 100644 tests/Algorithms/ExternalGroupByTest.php create mode 100644 tests/Algorithms/ExternalSortTest.php create mode 100644 tests/Batch/BatchProcessorTest.php create mode 100644 tests/Checkpoint/CheckpointManagerTest.php create mode 100644 tests/Collections/SpaceTimeArrayTest.php create mode 100644 tests/Memory/MemoryPressureMonitorTest.php create mode 100644 tests/Streams/SpaceTimeStreamTest.php diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..9dc2fb0 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,154 @@ +# Contributing to Ubiquity SpaceTime PHP + +Thank you for your interest in contributing to Ubiquity SpaceTime PHP! This document provides guidelines and instructions for contributing. + +## Code of Conduct + +By participating in this project, you agree to abide by our code of conduct: be respectful, inclusive, and considerate of others. + +## How to Contribute + +### Reporting Issues + +1. Check if the issue already exists in the [issue tracker](https://github.com/ubiquity/spacetime-php/issues) +2. If not, create a new issue with: + - Clear title and description + - Steps to reproduce (if applicable) + - Expected vs actual behavior + - PHP version and environment details + +### Submitting Pull Requests + +1. Fork the repository +2. Create a feature branch (`git checkout -b feature/my-feature`) +3. Make your changes following our coding standards +4. Add/update tests as needed +5. Update documentation if applicable +6. Commit with descriptive messages +7. Push to your fork +8. Submit a pull request + +## Development Setup + +```bash +# Clone your fork +git clone https://github.com/YOUR_USERNAME/ubiquity-php.git +cd ubiquity-php + +# Install dependencies +composer install + +# Run tests +vendor/bin/phpunit + +# Run tests with coverage +vendor/bin/phpunit --coverage-html coverage +``` + +## Coding Standards + +### PHP Standards + +- Follow PSR-12 coding style +- Use PHP 8.1+ features appropriately +- Add type declarations for all parameters and return types +- Use strict types (`declare(strict_types=1);`) + +### Code Style + +```php +property = $property; + } + + /** + * Method description + * + * @throws \Exception When something goes wrong + */ + public function doSomething(int $param): array + { + // Implementation + return []; + } +} +``` + +### Testing Guidelines + +- Write tests for all new features +- Maintain or improve code coverage +- Use descriptive test method names +- Follow AAA pattern (Arrange, Act, Assert) + +```php +public function testFeatureWorksCorrectly(): void +{ + // Arrange + $instance = new TestedClass(); + + // Act + $result = $instance->doSomething(); + + // Assert + $this->assertEquals('expected', $result); +} +``` + +## Documentation + +- Update README.md for new features +- Add PHPDoc blocks for all public methods +- Include usage examples for complex features +- Update CHANGELOG.md following [Keep a Changelog](https://keepachangelog.com/) + +## Performance Considerations + +Since SpaceTime focuses on memory efficiency: + +1. Always consider memory usage in your implementations +2. Benchmark memory usage and performance for new features +3. Document any trade-offs between memory and speed +4. Follow the √n principle where applicable + +## Pull Request Process + +1. Ensure all tests pass +2. Update documentation +3. Add entry to CHANGELOG.md +4. Request review from maintainers +5. Address feedback promptly +6. Squash commits if requested + +## Release Process + +Releases are managed by maintainers following semantic versioning: + +- MAJOR: Breaking changes +- MINOR: New features (backward compatible) +- PATCH: Bug fixes + +## Questions? + +Feel free to: +- Open an issue for questions +- Join our discussions +- Contact maintainers + +Thank you for contributing! \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c503fb6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2025 David H. Friedel Jr. and SqrtSpace Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..f26e539 --- /dev/null +++ b/README.md @@ -0,0 +1,578 @@ +# SqrtSpace SpaceTime for PHP + +[![Latest Stable Version](https://poser.pugx.org/sqrtspace/spacetime/v)](https://packagist.org/packages/sqrtspace/spacetime) +[![Total Downloads](https://poser.pugx.org/sqrtspace/spacetime/downloads)](https://packagist.org/packages/sqrtspace/spacetime) +[![License](https://poser.pugx.org/sqrtspace/spacetime/license)](https://packagist.org/packages/sqrtspace/spacetime) +[![PHP Version Require](https://poser.pugx.org/sqrtspace/spacetime/require/php)](https://packagist.org/packages/sqrtspace/spacetime) + +Memory-efficient algorithms and data structures for PHP using Williams' √n space-time tradeoffs. + +## Installation + +```bash +composer require sqrtspace/spacetime +``` + +## Core Concepts + +SpaceTime implements theoretical computer science results showing that many algorithms can achieve better memory usage by accepting slightly slower runtime. The key insight is using √n memory instead of n memory, where n is the input size. + +### Key Features + +- **External Sorting**: Sort large datasets that don't fit in memory +- **External Grouping**: Group and aggregate data with minimal memory usage +- **Streaming Operations**: Process files and data streams efficiently +- **Memory Pressure Handling**: Automatic response to low memory conditions +- **Checkpoint/Resume**: Save progress and resume long-running operations +- **Laravel Integration**: Deep integration with Laravel collections and queries + +## Quick Start + +```php +use SqrtSpace\SpaceTime\Collections\SpaceTimeArray; +use SqrtSpace\SpaceTime\Algorithms\ExternalSort; + +// Handle large arrays with automatic memory management +$array = new SpaceTimeArray(); +for ($i = 0; $i < 10000000; $i++) { + $array[] = random_int(1, 1000000); +} + +// Sort large datasets using only √n memory +$sorted = ExternalSort::sort($array); + +// Process in optimal chunks +foreach ($array->chunkBySqrtN() as $chunk) { + processChunk($chunk); +} +``` + +## Examples + +### Basic Examples +See [`examples/comprehensive_example.php`](examples/comprehensive_example.php) for a complete demonstration of all features including: +- Memory-efficient arrays and dictionaries +- External sorting and grouping +- Stream processing +- CSV import/export +- Batch processing with checkpoints +- Memory pressure monitoring + +### Laravel Application +Check out [`examples/laravel-app/`](examples/laravel-app/) for a complete Laravel application demonstrating: +- Streaming API endpoints +- Memory-efficient CSV exports +- Background job processing with checkpoints +- Real-time analytics with SSE +- Production-ready configurations + +See the [Laravel example README](examples/laravel-app/README.md) for setup instructions and detailed usage. + +## Features + +### 1. Memory-Efficient Collections + +```php +use SqrtSpace\SpaceTime\Collections\SpaceTimeArray; +use SqrtSpace\SpaceTime\Collections\AdaptiveDictionary; + +// Adaptive array - automatically switches between memory and disk +$array = new SpaceTimeArray(); +$array->setThreshold(10000); // Switch to external storage after 10k items + +// Adaptive dictionary with optimal memory usage +$dict = new AdaptiveDictionary(); +for ($i = 0; $i < 1000000; $i++) { + $dict["key_$i"] = "value_$i"; +} +``` + +### 2. External Algorithms + +```php +use SqrtSpace\SpaceTime\Algorithms\ExternalSort; +use SqrtSpace\SpaceTime\Algorithms\ExternalGroupBy; + +// Sort millions of records using minimal memory +$data = getData(); // Large dataset +$sorted = ExternalSort::sort($data, fn($a, $b) => $a['date'] <=> $b['date']); + +// Group by with external storage +$grouped = ExternalGroupBy::groupBy($data, fn($item) => $item['category']); +``` + +### 3. Streaming Operations + +```php +use SqrtSpace\SpaceTime\Streams\SpaceTimeStream; + +// Process large files with bounded memory +$stream = SpaceTimeStream::fromFile('large_file.csv') + ->map(fn($line) => str_getcsv($line)) + ->filter(fn($row) => $row[2] > 100) + ->chunkBySqrtN() + ->each(function($chunk) { + processBatch($chunk); + }); +``` + +### 4. Database Integration + +```php +use SqrtSpace\SpaceTime\Database\SpaceTimeQueryBuilder; + +// Process large result sets efficiently +$query = new SpaceTimeQueryBuilder($pdo); +$query->from('orders') + ->where('status', '=', 'pending') + ->orderByExternal('created_at', 'desc') + ->chunkBySqrtN(function($orders) { + foreach ($orders as $order) { + processOrder($order); + } + }); + +// Stream results for minimal memory usage +$stream = $query->from('logs') + ->where('level', '=', 'error') + ->stream(); + +$stream->filter(fn($log) => strpos($log['message'], 'critical') !== false) + ->each(fn($log) => alertAdmin($log)); +``` + +### 5. Laravel Integration + +```php +// In AppServiceProvider +use SqrtSpace\SpaceTime\Laravel\SpaceTimeServiceProvider; + +public function register() +{ + $this->app->register(SpaceTimeServiceProvider::class); +} + +// Collection macros +$collection = collect($largeArray); + +// Sort using external memory +$sorted = $collection->sortByExternal('price'); + +// Group by with external storage +$grouped = $collection->groupByExternal('category'); + +// Process in √n chunks +$collection->chunkBySqrtN()->each(function ($chunk) { + processBatch($chunk); +}); + +// Query builder extensions +DB::table('orders') + ->chunkBySqrtN(function ($orders) { + foreach ($orders as $order) { + processOrder($order); + } + }); +``` + +### 6. Memory Pressure Handling + +```php +use SqrtSpace\SpaceTime\Memory\MemoryPressureMonitor; +use SqrtSpace\SpaceTime\Memory\Handlers\LoggingHandler; +use SqrtSpace\SpaceTime\Memory\Handlers\CacheEvictionHandler; +use SqrtSpace\SpaceTime\Memory\Handlers\GarbageCollectionHandler; + +$monitor = new MemoryPressureMonitor('512M'); + +// Add handlers +$monitor->registerHandler(new LoggingHandler($logger)); +$monitor->registerHandler(new CacheEvictionHandler()); +$monitor->registerHandler(new GarbageCollectionHandler()); + +// Check pressure in your operations +if ($monitor->check() === MemoryPressureLevel::HIGH) { + // Switch to more aggressive memory saving + $processor->useExternalStorage(); +} +``` + +### 7. Checkpointing for Fault Tolerance + +```php +use SqrtSpace\SpaceTime\Checkpoint\CheckpointManager; + +$checkpoint = new CheckpointManager('import_job_123'); + +foreach ($largeDataset->chunkBySqrtN() as $chunk) { + processChunk($chunk); + + // Save progress every √n items + if ($checkpoint->shouldCheckpoint()) { + $checkpoint->save([ + 'processed' => $processedCount, + 'last_id' => $lastId + ]); + } +} +``` + +## Real-World Examples + +### Processing Large CSV Files + +```php +use SqrtSpace\SpaceTime\File\CsvReader; +use SqrtSpace\SpaceTime\Algorithms\ExternalGroupBy; + +$reader = new CsvReader('sales_data.csv'); + +// Get column statistics +$stats = $reader->getColumnStats('amount'); +echo "Average order: $" . $stats['avg']; + +// Process with type conversion +$totals = $reader->readWithTypes([ + 'amount' => 'float', + 'quantity' => 'int', + 'date' => 'date' +])->reduce(function ($totals, $row) { + $month = $row['date']->format('Y-m'); + $totals[$month] = ($totals[$month] ?? 0) + $row['amount']; + return $totals; +}, []); +``` + +### Large Data Export + +```php +use SqrtSpace\SpaceTime\File\CsvExporter; +use SqrtSpace\SpaceTime\Database\SpaceTimeQueryBuilder; + +$exporter = new CsvExporter('users_export.csv'); +$query = new SpaceTimeQueryBuilder($pdo); + +// Export with headers +$exporter->writeHeaders(['ID', 'Name', 'Email', 'Created At']); + +// Stream data directly to CSV +$query->from('users') + ->orderBy('created_at', 'desc') + ->chunkBySqrtN(function($users) use ($exporter) { + $exporter->writeRows(array_map(function($user) { + return [ + $user['id'], + $user['name'], + $user['email'], + $user['created_at'] + ]; + }, $users)); + }); + +echo "Exported " . number_format($exporter->getBytesWritten()) . " bytes\n"; +``` + +### Batch Processing with Memory Limits + +```php +use SqrtSpace\SpaceTime\Batch\BatchProcessor; + +$processor = new BatchProcessor([ + 'memory_threshold' => 0.8, + 'checkpoint_enabled' => true, + 'progress_callback' => function($batch, $size, $result) { + echo "Processed batch $batch ($size items)\n"; + } +]); + +$result = $processor->process($millionItems, function($batch) { + $processed = []; + foreach ($batch as $key => $item) { + $processed[$key] = expensiveOperation($item); + } + return $processed; +}, 'job_123'); + +echo "Success: " . $result->getSuccessCount() . "\n"; +echo "Errors: " . $result->getErrorCount() . "\n"; +echo "Time: " . $result->getExecutionTime() . "s\n"; +``` + +## Configuration + +```php +use SqrtSpace\SpaceTime\SpaceTimeConfig; + +// Global configuration +SpaceTimeConfig::configure([ + 'memory_limit' => '512M', + 'external_storage_path' => '/tmp/spacetime', + 'chunk_strategy' => 'sqrt_n', // or 'memory_based', 'fixed' + 'enable_checkpointing' => true, + 'compression' => true, + 'compression_level' => 6 +]); + +// Per-operation configuration +$array = new SpaceTimeArray(10000); // threshold + +// Check configuration +echo "Chunk size for 1M items: " . SpaceTimeConfig::calculateSqrtN(1000000) . "\n"; +echo "Storage path: " . SpaceTimeConfig::getStoragePath() . "\n"; +``` + +## Advanced Usage + +### JSON Lines Processing + +```php +use SqrtSpace\SpaceTime\File\JsonLinesProcessor; + +// Process large JSONL files +JsonLinesProcessor::processInChunks('events.jsonl', function($events) { + foreach ($events as $event) { + if ($event['type'] === 'error') { + logError($event); + } + } +}); + +// Split large file +$files = JsonLinesProcessor::split('huge.jsonl', 100000, 'output/chunk'); +echo "Split into " . count($files) . " files\n"; + +// Merge multiple files +$count = JsonLinesProcessor::merge($files, 'merged.jsonl'); +echo "Merged $count records\n"; +``` + +### Streaming Operations + +```php +use SqrtSpace\SpaceTime\Streams\SpaceTimeStream; + +// Chain operations efficiently +SpaceTimeStream::fromCsv('sales.csv') + ->filter(fn($row) => $row['region'] === 'US') + ->map(fn($row) => [ + 'product' => $row['product'], + 'revenue' => $row['quantity'] * $row['price'] + ]) + ->chunkBySqrtN() + ->each(function($chunk) { + $total = array_sum(array_column($chunk, 'revenue')); + echo "Chunk revenue: \$$total\n"; + }); +``` + +### Custom Batch Jobs + +```php +use SqrtSpace\SpaceTime\Batch\BatchJob; + +class ImportJob extends BatchJob +{ + private string $filename; + + public function __construct(string $filename) + { + parent::__construct(); + $this->filename = $filename; + } + + protected function getItems(): iterable + { + return SpaceTimeStream::fromCsv($this->filename); + } + + public function processItem(array $batch): array + { + $results = []; + foreach ($batch as $key => $row) { + $user = User::create([ + 'name' => $row['name'], + 'email' => $row['email'] + ]); + $results[$key] = $user->id; + } + return $results; + } + + protected function getUniqueId(): string + { + return md5($this->filename); + } +} + +// Run job with automatic checkpointing +$job = new ImportJob('users.csv'); +$result = $job->execute(); + +// Or resume if interrupted +if ($job->canResume()) { + $result = $job->resume(); +} +``` + +## Testing + +```bash +# Run all tests +vendor/bin/phpunit + +# Run specific test suite +vendor/bin/phpunit tests/Algorithms + +# With coverage +vendor/bin/phpunit --coverage-html coverage +``` + +## Performance Considerations + +1. **Chunk Size**: The default √n chunk size is optimal for most cases, but you can tune it: + ```php + SpaceTimeConfig::configure(['chunk_strategy' => 'fixed', 'fixed_chunk_size' => 5000]); + ``` + +2. **Compression**: Enable for text-heavy data, disable for already compressed data: + ```php + SpaceTimeConfig::configure(['compression' => false]); + ``` + +3. **Storage Location**: Use fast local SSDs for external storage: + ```php + SpaceTimeConfig::configure(['external_storage_path' => '/mnt/fast-ssd/spacetime']); + ``` + +## Framework Integration + +### Laravel + +```php +// config/spacetime.php +return [ + 'memory_limit' => env('SPACETIME_MEMORY_LIMIT', '256M'), + 'storage_driver' => env('SPACETIME_STORAGE', 'file'), + 'redis_connection' => env('SPACETIME_REDIS', 'default'), +]; + +// In controller +public function exportOrders() +{ + return SpaceTimeResponse::stream(function() { + Order::orderByExternal('created_at') + ->chunkBySqrtN(function($orders) { + foreach ($orders as $order) { + echo $order->toCsv() . "\n"; + } + }); + }); +} +``` + +### Symfony + +For a complete Symfony integration example, see our [Symfony bundle documentation](https://github.com/MarketAlly/Ubiquity/wiki/Symfony-Integration). + +```yaml +# config/bundles.php +return [ + // ... + SqrtSpace\SpaceTime\Symfony\SpaceTimeBundle::class => ['all' => true], +]; +``` + +```yaml +# config/packages/spacetime.yaml +spacetime: + memory_limit: '%env(SPACETIME_MEMORY_LIMIT)%' + storage_path: '%kernel.project_dir%/var/spacetime' + chunk_strategy: 'sqrt_n' + enable_checkpointing: true + compression: true +``` + +```php +// In controller +use SqrtSpace\SpaceTime\Batch\BatchProcessor; +use SqrtSpace\SpaceTime\File\CsvReader; + +#[Route('/import')] +public function import(BatchProcessor $processor): Response +{ + $reader = new CsvReader($this->getParameter('import_file')); + + $result = $processor->process( + $reader->stream(), + fn($batch) => $this->importBatch($batch) + ); + + return $this->json([ + 'imported' => $result->getSuccessCount(), + 'errors' => $result->getErrorCount() + ]); +} +``` + +```bash +# Console command +php bin/console spacetime:process-file input.csv output.csv --format=csv --checkpoint +``` + +## Troubleshooting + +### Out of Memory Errors + +1. Reduce chunk size: + ```php + SpaceTimeConfig::configure(['chunk_strategy' => 'fixed', 'fixed_chunk_size' => 1000]); + ``` + +2. Enable more aggressive memory handling: + ```php + $monitor = new MemoryPressureMonitor('128M'); // Lower threshold + ``` + +3. Use external storage earlier: + ```php + $array = new SpaceTimeArray(100); // Smaller threshold + ``` + +### Performance Issues + +1. Check disk I/O speed +2. Enable compression for text data +3. Use memory-based external storage: + ```php + SpaceTimeConfig::configure(['external_storage_path' => '/dev/shm/spacetime']); + ``` + +### Checkpoint Recovery + +```php +$checkpoint = new CheckpointManager('job_id'); +if ($checkpoint->exists()) { + $state = $checkpoint->load(); + echo "Resuming from: " . json_encode($state) . "\n"; +} +``` + +## Requirements + +- PHP 8.1 or higher +- ext-json +- ext-mbstring + +## Optional Extensions + +- ext-apcu for faster caching +- ext-redis for distributed operations +- ext-zlib for compression + +## Contributing + +Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details. + +## License + +The Apache 2.0 License. Please see [LICENSE](LICENSE) for details. \ No newline at end of file diff --git a/config/services.yaml b/config/services.yaml new file mode 100644 index 0000000..6825600 --- /dev/null +++ b/config/services.yaml @@ -0,0 +1,48 @@ +services: + _defaults: + autowire: true + autoconfigure: true + + # Memory Monitor + SqrtSpace\SpaceTime\Memory\MemoryPressureMonitor: + arguments: + $memoryLimit: '%spacetime.memory_limit%' + + # Memory Handlers + SqrtSpace\SpaceTime\Memory\Handlers\LoggingHandler: + arguments: + $logger: '@logger' + $minLevel: !php/const SqrtSpace\SpaceTime\Memory\MemoryPressureLevel::MEDIUM + tags: + - { name: 'spacetime.memory_handler' } + + SqrtSpace\SpaceTime\Memory\Handlers\GarbageCollectionHandler: + tags: + - { name: 'spacetime.memory_handler' } + + # External Storage + SqrtSpace\SpaceTime\Storage\ExternalStorage: + factory: ['SqrtSpace\SpaceTime\Storage\ExternalStorage', 'create'] + arguments: + $prefix: 'symfony_' + + # Algorithms + SqrtSpace\SpaceTime\Algorithms\ExternalSort: + public: true + + SqrtSpace\SpaceTime\Algorithms\ExternalGroupBy: + public: true + + # Checkpoint Storage + SqrtSpace\SpaceTime\Checkpoint\FileCheckpointStorage: + arguments: + $basePath: '%spacetime.storage_path%/checkpoints' + + SqrtSpace\SpaceTime\Checkpoint\CheckpointStorage: + alias: SqrtSpace\SpaceTime\Checkpoint\FileCheckpointStorage + + # Database Query Builder + SqrtSpace\SpaceTime\Database\SpaceTimeQueryBuilder: + factory: ['SqrtSpace\SpaceTime\Database\SpaceTimeQueryBuilder', 'new'] + arguments: + $connection: '@database_connection' \ No newline at end of file diff --git a/config/spacetime.php b/config/spacetime.php new file mode 100644 index 0000000..7afdaad --- /dev/null +++ b/config/spacetime.php @@ -0,0 +1,110 @@ + env('SPACETIME_MEMORY_LIMIT', '256M'), + + /* + |-------------------------------------------------------------------------- + | Storage Path + |-------------------------------------------------------------------------- + | + | Directory where SpaceTime will store temporary files for external + | algorithms. Defaults to storage/spacetime. + | + */ + 'storage_path' => env('SPACETIME_STORAGE_PATH', storage_path('spacetime')), + + /* + |-------------------------------------------------------------------------- + | Chunk Strategy + |-------------------------------------------------------------------------- + | + | Strategy for determining chunk sizes: + | - 'sqrt_n': Use √n of total items (recommended) + | - 'memory_based': Based on available memory + | - 'fixed': Fixed chunk size + | + */ + 'chunk_strategy' => env('SPACETIME_CHUNK_STRATEGY', 'sqrt_n'), + + /* + |-------------------------------------------------------------------------- + | Enable Checkpointing + |-------------------------------------------------------------------------- + | + | Whether to enable automatic checkpointing for long-running operations. + | Checkpoints allow operations to be resumed after failures. + | + */ + 'enable_checkpointing' => env('SPACETIME_CHECKPOINTING', true), + + /* + |-------------------------------------------------------------------------- + | Checkpoint Storage + |-------------------------------------------------------------------------- + | + | Where to store checkpoints: + | - 'file': Store in filesystem + | - 'cache': Use Laravel's cache system + | - 'database': Store in database + | + */ + 'checkpoint_storage' => env('SPACETIME_CHECKPOINT_STORAGE', 'file'), + + /* + |-------------------------------------------------------------------------- + | Compression + |-------------------------------------------------------------------------- + | + | Whether to compress data in external storage. Reduces disk usage + | but adds CPU overhead. + | + */ + 'compression' => env('SPACETIME_COMPRESSION', true), + 'compression_level' => env('SPACETIME_COMPRESSION_LEVEL', 6), + + /* + |-------------------------------------------------------------------------- + | Redis Connection + |-------------------------------------------------------------------------- + | + | Redis connection to use for distributed operations. Set to null + | to disable distributed features. + | + */ + 'redis_connection' => env('SPACETIME_REDIS_CONNECTION', null), + + /* + |-------------------------------------------------------------------------- + | Profiling + |-------------------------------------------------------------------------- + | + | Enable profiling to collect performance metrics. Useful for debugging + | but adds overhead. + | + */ + 'enable_profiling' => env('SPACETIME_PROFILING', false), + + /* + |-------------------------------------------------------------------------- + | Cleanup + |-------------------------------------------------------------------------- + | + | How to handle cleanup of temporary files: + | - 'immediate': Clean up immediately after use + | - 'delayed': Clean up after a delay + | - 'manual': No automatic cleanup + | + */ + 'cleanup_mode' => env('SPACETIME_CLEANUP_MODE', 'immediate'), + 'cleanup_delay' => env('SPACETIME_CLEANUP_DELAY', 3600), // 1 hour +]; \ No newline at end of file diff --git a/examples/comprehensive_example.php b/examples/comprehensive_example.php new file mode 100644 index 0000000..0b7efe5 --- /dev/null +++ b/examples/comprehensive_example.php @@ -0,0 +1,206 @@ + '256M', + 'external_storage_path' => __DIR__ . '/temp', + 'chunk_strategy' => 'sqrt_n', + 'enable_checkpointing' => true, + 'compression' => true, +]); + +echo "=== Ubiquity SpaceTime PHP Examples ===\n\n"; + +// Example 1: Memory-Efficient Array +echo "1. SpaceTimeArray Example\n"; +$array = new SpaceTimeArray(1000); // Spill to disk after 1000 items + +// Add 10,000 items +for ($i = 0; $i < 10000; $i++) { + $array["key_$i"] = "value_$i"; +} + +echo " - Created array with " . count($array) . " items\n"; +echo " - Memory usage: " . number_format(memory_get_usage(true) / 1024 / 1024, 2) . " MB\n\n"; + +// Example 2: External Sorting +echo "2. External Sort Example\n"; +$unsorted = []; +for ($i = 0; $i < 50000; $i++) { + $unsorted[] = [ + 'id' => $i, + 'value' => mt_rand(1, 1000000), + 'name' => 'Item ' . $i + ]; +} + +$sorted = ExternalSort::sortBy($unsorted, fn($item) => $item['value']); +echo " - Sorted " . count($sorted) . " items by value\n"; +echo " - First item value: " . $sorted[0]['value'] . "\n"; +echo " - Last item value: " . $sorted[count($sorted) - 1]['value'] . "\n\n"; + +// Example 3: External GroupBy +echo "3. External GroupBy Example\n"; +$orders = []; +for ($i = 0; $i < 10000; $i++) { + $orders[] = [ + 'customer_id' => mt_rand(1, 100), + 'amount' => mt_rand(10, 1000), + 'category' => ['Electronics', 'Clothing', 'Food', 'Books'][mt_rand(0, 3)] + ]; +} + +$byCategory = ExternalGroupBy::groupBySum( + $orders, + fn($order) => $order['category'], + fn($order) => $order['amount'] +); + +foreach ($byCategory as $category => $total) { + echo " - $category: $" . number_format($total, 2) . "\n"; +} +echo "\n"; + +// Example 4: Stream Processing +echo "4. Stream Processing Example\n"; + +// Create sample CSV file +$csvFile = __DIR__ . '/sample.csv'; +$csv = fopen($csvFile, 'w'); +fputcsv($csv, ['id', 'name', 'price', 'quantity']); +for ($i = 1; $i <= 1000; $i++) { + fputcsv($csv, [$i, "Product $i", mt_rand(10, 100), mt_rand(1, 50)]); +} +fclose($csv); + +// Process CSV stream +$totalRevenue = SpaceTimeStream::fromCsv($csvFile) + ->map(fn($row) => [ + 'id' => $row['id'], + 'revenue' => (float)$row['price'] * (int)$row['quantity'] + ]) + ->reduce(fn($total, $row) => $total + $row['revenue'], 0); + +echo " - Total revenue from 1000 products: $" . number_format($totalRevenue, 2) . "\n\n"; + +// Example 5: Memory Pressure Monitoring +echo "5. Memory Pressure Monitoring Example\n"; +$monitor = new MemoryPressureMonitor('100M'); + +// Simulate memory usage +$data = []; +for ($i = 0; $i < 100; $i++) { + $data[] = str_repeat('x', 100000); // 100KB per item + + $level = $monitor->check(); + if ($level !== MemoryPressureLevel::NONE) { + echo " - Memory pressure detected: " . $level->value . "\n"; + $info = $monitor->getMemoryInfo(); + echo " - Memory usage: " . round($info['percentage'], 2) . "%\n"; + break; + } +} + +// Clean up +unset($data); +$monitor->forceCleanup(); +echo "\n"; + +// Example 6: Batch Processing with Checkpoints +echo "6. Batch Processing Example\n"; + +$processor = new BatchProcessor([ + 'batch_size' => 100, + 'checkpoint_enabled' => true, + 'progress_callback' => function($batch, $size, $result) { + echo " - Processing batch $batch ($size items)\n"; + } +]); + +$items = range(1, 500); +$result = $processor->process($items, function($batch) { + $processed = []; + foreach ($batch as $key => $value) { + // Simulate processing + $processed[$key] = $value * 2; + } + return $processed; +}, 'example_job'); + +echo " - Processed: " . $result->getSuccessCount() . " items\n"; +echo " - Execution time: " . round($result->getExecutionTime(), 2) . " seconds\n\n"; + +// Example 7: CSV Export with Streaming +echo "7. CSV Export Example\n"; + +$exportFile = __DIR__ . '/export.csv'; +$exporter = new CsvExporter($exportFile); + +$exporter->writeHeaders(['ID', 'Name', 'Email', 'Created']); + +// Simulate exporting user data +$exporter->writeInChunks(function() { + for ($i = 1; $i <= 1000; $i++) { + yield [ + 'id' => $i, + 'name' => "User $i", + 'email' => "user$i@example.com", + 'created' => date('Y-m-d H:i:s') + ]; + } +}()); + +echo " - Exported to: $exportFile\n"; +echo " - File size: " . number_format(filesize($exportFile) / 1024, 2) . " KB\n\n"; + +// Example 8: JSON Lines Processing +echo "8. JSON Lines Processing Example\n"; + +$jsonlFile = __DIR__ . '/events.jsonl'; +$events = []; +for ($i = 0; $i < 100; $i++) { + $events[] = [ + 'id' => $i, + 'type' => ['click', 'view', 'purchase'][mt_rand(0, 2)], + 'timestamp' => time() - mt_rand(0, 86400), + 'user_id' => mt_rand(1, 50) + ]; +} + +JsonLinesProcessor::write($events, $jsonlFile); + +// Process and filter +$filtered = __DIR__ . '/purchases.jsonl'; +$count = JsonLinesProcessor::filter( + $jsonlFile, + $filtered, + fn($event) => $event['type'] === 'purchase' +); + +echo " - Created JSONL with 100 events\n"; +echo " - Filtered $count purchase events\n\n"; + +// Clean up example files +echo "Cleaning up example files...\n"; +unlink($csvFile); +unlink($exportFile); +unlink($jsonlFile); +unlink($filtered); + +echo "\n=== Examples Complete ===\n"; \ No newline at end of file diff --git a/examples/laravel-app/README.md b/examples/laravel-app/README.md new file mode 100644 index 0000000..e21f98c --- /dev/null +++ b/examples/laravel-app/README.md @@ -0,0 +1,375 @@ +# SqrtSpace SpaceTime Laravel Sample Application + +This sample demonstrates how to integrate SqrtSpace SpaceTime with a Laravel application to build memory-efficient, scalable web applications. + +## Features Demonstrated + +### 1. **Large Dataset API Endpoints** +- Streaming JSON responses for large datasets +- Paginated queries with automatic memory management +- CSV export without memory bloat + +### 2. **Background Job Processing** +- Memory-aware queue workers +- Checkpointed long-running jobs +- Batch processing with progress tracking + +### 3. **Caching with SpaceTime** +- Hot/cold cache tiers +- Automatic memory pressure handling +- Cache warming strategies + +### 4. **Real-World Use Cases** +- User activity log processing +- Sales report generation +- Product catalog management +- Real-time analytics + +## Installation + +1. **Install dependencies:** +```bash +composer install +``` + +2. **Configure environment:** +```bash +cp .env.example .env +php artisan key:generate +``` + +3. **Configure SpaceTime in `.env`:** +``` +SPACETIME_MEMORY_LIMIT=256M +SPACETIME_EXTERNAL_STORAGE=/tmp/spacetime +SPACETIME_CHUNK_STRATEGY=sqrt_n +SPACETIME_ENABLE_CHECKPOINTING=true +``` + +4. **Run migrations:** +```bash +php artisan migrate +php artisan db:seed +``` + +## Project Structure + +``` +laravel-app/ +├── app/ +│ ├── Http/ +│ │ ├── Controllers/ +│ │ │ ├── ProductController.php # Streaming APIs +│ │ │ ├── AnalyticsController.php # Real-time analytics +│ │ │ └── ReportController.php # Large report generation +│ │ └── Middleware/ +│ │ └── SpaceTimeMiddleware.php # Memory monitoring +│ ├── Jobs/ +│ │ ├── ProcessLargeDataset.php # Checkpointed job +│ │ ├── GenerateReport.php # Batch processing job +│ │ └── ImportProducts.php # CSV import job +│ ├── Services/ +│ │ ├── ProductService.php # Business logic +│ │ ├── AnalyticsService.php # Analytics processing +│ │ └── SpaceTimeCache.php # Cache wrapper +│ └── Providers/ +│ └── SpaceTimeServiceProvider.php # Service registration +├── config/ +│ └── spacetime.php # Configuration +├── routes/ +│ ├── api.php # API routes +│ └── web.php # Web routes +└── tests/ + └── Feature/ + └── SpaceTimeTest.php # Integration tests +``` + +## Usage Examples + +### 1. Streaming Large Datasets + +```php +// ProductController.php +public function stream() +{ + return response()->stream(function () { + $products = SpaceTimeStream::fromQuery( + Product::query()->orderBy('id') + ); + + echo "["; + $first = true; + + foreach ($products->chunk(100) as $chunk) { + foreach ($chunk as $product) { + if (!$first) echo ","; + echo $product->toJson(); + $first = false; + } + + // Flush output buffer + ob_flush(); + flush(); + } + + echo "]"; + }, 200, [ + 'Content-Type' => 'application/json', + 'X-Accel-Buffering' => 'no' + ]); +} +``` + +### 2. Memory-Efficient CSV Export + +```php +// ReportController.php +public function exportCsv() +{ + $filename = 'products_' . date('Y-m-d') . '.csv'; + + return response()->streamDownload(function () { + $exporter = new CsvExporter('php://output'); + $exporter->writeHeaders(['ID', 'Name', 'Price', 'Stock']); + + Product::query() + ->orderBy('id') + ->chunkById(1000, function ($products) use ($exporter) { + foreach ($products as $product) { + $exporter->writeRow([ + $product->id, + $product->name, + $product->price, + $product->stock + ]); + } + }); + }, $filename, [ + 'Content-Type' => 'text/csv', + ]); +} +``` + +### 3. Checkpointed Background Job + +```php +// ProcessLargeDataset.php +class ProcessLargeDataset implements ShouldQueue +{ + use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; + use SpaceTimeCheckpointable; + + public function handle() + { + $checkpoint = $this->getCheckpoint(); + $lastId = $checkpoint['last_id'] ?? 0; + + Order::where('id', '>', $lastId) + ->orderBy('id') + ->chunkById(100, function ($orders) { + foreach ($orders as $order) { + // Process order + $this->processOrder($order); + + // Save checkpoint every 100 orders + if ($order->id % 100 === 0) { + $this->saveCheckpoint([ + 'last_id' => $order->id, + 'processed' => $this->processed, + ]); + } + } + }); + } +} +``` + +### 4. Real-Time Analytics + +```php +// AnalyticsController.php +public function realtime() +{ + return response()->stream(function () { + $monitor = new MemoryPressureMonitor('100M'); + + while (true) { + $stats = $this->analyticsService->getCurrentStats(); + + // Send as Server-Sent Event + echo "data: " . json_encode($stats) . "\n\n"; + ob_flush(); + flush(); + + // Check memory pressure + if ($monitor->check() !== MemoryPressureLevel::NONE) { + $this->analyticsService->compact(); + } + + sleep(1); + } + }, 200, [ + 'Content-Type' => 'text/event-stream', + 'Cache-Control' => 'no-cache', + 'X-Accel-Buffering' => 'no' + ]); +} +``` + +### 5. Memory-Aware Caching + +```php +// SpaceTimeCache.php +class SpaceTimeCache +{ + private SpaceTimeDict $hot; + private CacheInterface $cold; + private MemoryPressureMonitor $monitor; + + public function get($key) + { + // Check hot cache first + if (isset($this->hot[$key])) { + return $this->hot[$key]; + } + + // Check cold storage + $value = $this->cold->get($key); + if ($value !== null) { + // Promote to hot cache if memory allows + if ($this->monitor->canAllocate(strlen($value))) { + $this->hot[$key] = $value; + } + } + + return $value; + } +} +``` + +## API Endpoints + +### Products API + +- `GET /api/products` - Paginated list +- `GET /api/products/stream` - Stream all products as NDJSON +- `GET /api/products/export/csv` - Export as CSV +- `POST /api/products/bulk-update` - Bulk update with checkpointing +- `POST /api/products/import` - Import CSV with progress + +### Analytics API + +- `GET /api/analytics/summary` - Get summary statistics +- `GET /api/analytics/realtime` - Real-time SSE stream +- `POST /api/analytics/report` - Generate large report +- `GET /api/analytics/top-products` - Top products with external sorting + +### Reports API + +- `POST /api/reports/generate` - Generate report (queued) +- `GET /api/reports/{id}/status` - Check generation status +- `GET /api/reports/{id}/download` - Download completed report + +## Testing + +Run the test suite: + +```bash +php artisan test +``` + +Example test: + +```php +public function test_can_stream_large_dataset() +{ + // Seed test data + Product::factory()->count(10000)->create(); + + // Make streaming request + $response = $this->getJson('/api/products/stream'); + + $response->assertStatus(200); + $response->assertHeader('Content-Type', 'application/json'); + + // Verify memory usage stayed low + $this->assertLessThan(50 * 1024 * 1024, memory_get_peak_usage()); +} +``` + +## Performance Tips + +1. **Configure memory limits** based on your server capacity +2. **Use streaming responses** for large datasets +3. **Enable checkpointing** for long-running jobs +4. **Monitor memory pressure** in production +5. **Use external storage** on fast SSDs +6. **Configure queue workers** with appropriate memory limits + +## Deployment + +### Nginx Configuration + +```nginx +location /api/products/stream { + proxy_pass http://backend; + proxy_buffering off; + proxy_read_timeout 3600; +} + +location /api/analytics/realtime { + proxy_pass http://backend; + proxy_buffering off; + proxy_read_timeout 0; + proxy_http_version 1.1; +} +``` + +### Supervisor Configuration + +```ini +[program:spacetime-worker] +command=php /path/to/artisan queue:work --memory=256 +numprocs=4 +autostart=true +autorestart=true +``` + +## Monitoring + +Add to your monitoring: + +```php +// app/Console/Commands/MonitorSpaceTime.php +$stats = [ + 'memory_usage' => memory_get_usage(true), + 'peak_memory' => memory_get_peak_usage(true), + 'external_files' => count(glob(config('spacetime.external_storage') . '/*')), + 'cache_size' => $this->cache->size(), +]; + +Log::channel('metrics')->info('spacetime.stats', $stats); +``` + +## Troubleshooting + +### High Memory Usage +- Check `SPACETIME_MEMORY_LIMIT` setting +- Enable more aggressive spillover +- Use smaller chunk sizes + +### Slow Performance +- Ensure external storage is on SSD +- Increase memory limit if possible +- Use compression for large values + +### Failed Checkpoints +- Check storage permissions +- Ensure sufficient disk space +- Verify checkpoint directory exists + +## Learn More + +- [SqrtSpace SpaceTime Documentation](https://github.com/MarketAlly/Ubiquity) +- [Laravel Documentation](https://laravel.com/docs) +- [Memory-Efficient PHP Patterns](https://example.com/patterns) \ No newline at end of file diff --git a/examples/laravel-app/app/Http/Controllers/ProductController.php b/examples/laravel-app/app/Http/Controllers/ProductController.php new file mode 100644 index 0000000..3b717b4 --- /dev/null +++ b/examples/laravel-app/app/Http/Controllers/ProductController.php @@ -0,0 +1,194 @@ +productService = $productService; + } + + /** + * Get paginated products + */ + public function index(Request $request) + { + $perPage = min($request->get('per_page', 50), 100); + + return Product::query() + ->when($request->get('category'), function ($query, $category) { + $query->where('category', $category); + }) + ->when($request->get('min_price'), function ($query, $minPrice) { + $query->where('price', '>=', $minPrice); + }) + ->orderBy('id') + ->paginate($perPage); + } + + /** + * Stream all products as NDJSON + */ + public function stream(Request $request) + { + return response()->stream(function () use ($request) { + $query = Product::query() + ->when($request->get('category'), function ($query, $category) { + $query->where('category', $category); + }) + ->orderBy('id'); + + $stream = SpaceTimeStream::fromQuery($query, 100); + + foreach ($stream as $product) { + echo $product->toJson() . "\n"; + ob_flush(); + flush(); + } + }, 200, [ + 'Content-Type' => 'application/x-ndjson', + 'X-Accel-Buffering' => 'no', + 'Cache-Control' => 'no-cache' + ]); + } + + /** + * Export products as CSV + */ + public function exportCsv(Request $request) + { + $filename = 'products_' . date('Y-m-d_His') . '.csv'; + + return response()->streamDownload(function () use ($request) { + $exporter = new CsvExporter('php://output'); + $exporter->writeHeaders([ + 'ID', 'Name', 'SKU', 'Category', 'Price', + 'Stock', 'Description', 'Created At' + ]); + + Product::query() + ->when($request->get('category'), function ($query, $category) { + $query->where('category', $category); + }) + ->orderBy('id') + ->chunkById(1000, function ($products) use ($exporter) { + foreach ($products as $product) { + $exporter->writeRow([ + $product->id, + $product->name, + $product->sku, + $product->category, + $product->price, + $product->stock, + $product->description, + $product->created_at + ]); + } + }); + }, $filename, [ + 'Content-Type' => 'text/csv', + ]); + } + + /** + * Bulk update product prices with checkpointing + */ + public function bulkUpdatePrices(Request $request) + { + $request->validate([ + 'category' => 'required|string', + 'adjustment_type' => 'required|in:percentage,fixed', + 'adjustment_value' => 'required|numeric' + ]); + + $jobId = 'price_update_' . uniqid(); + $checkpointManager = app(CheckpointManager::class); + + // Check for existing checkpoint + $checkpoint = $checkpointManager->restore($jobId); + $lastId = $checkpoint['last_id'] ?? 0; + $updated = $checkpoint['updated'] ?? 0; + + DB::beginTransaction(); + + try { + Product::where('category', $request->category) + ->where('id', '>', $lastId) + ->orderBy('id') + ->chunkById(100, function ($products) use ($request, &$updated, $jobId, $checkpointManager) { + foreach ($products as $product) { + if ($request->adjustment_type === 'percentage') { + $product->price *= (1 + $request->adjustment_value / 100); + } else { + $product->price += $request->adjustment_value; + } + $product->save(); + $updated++; + + // Checkpoint every 100 updates + if ($updated % 100 === 0) { + $checkpointManager->save($jobId, [ + 'last_id' => $product->id, + 'updated' => $updated + ]); + } + } + }); + + DB::commit(); + $checkpointManager->delete($jobId); + + return response()->json([ + 'success' => true, + 'updated' => $updated, + 'job_id' => $jobId + ]); + + } catch (\Exception $e) { + DB::rollBack(); + + return response()->json([ + 'success' => false, + 'error' => $e->getMessage(), + 'job_id' => $jobId, + 'can_resume' => true + ], 500); + } + } + + /** + * Search products with memory-efficient sorting + */ + public function search(Request $request) + { + $request->validate([ + 'q' => 'required|string|min:2', + 'sort_by' => 'in:relevance,price_asc,price_desc,name' + ]); + + return $this->productService->searchProducts( + $request->get('q'), + $request->get('sort_by', 'relevance'), + $request->get('limit', 100) + ); + } + + /** + * Get product statistics + */ + public function statistics() + { + return $this->productService->getStatistics(); + } +} \ No newline at end of file diff --git a/examples/laravel-app/app/Jobs/ProcessLargeDataset.php b/examples/laravel-app/app/Jobs/ProcessLargeDataset.php new file mode 100644 index 0000000..dea9879 --- /dev/null +++ b/examples/laravel-app/app/Jobs/ProcessLargeDataset.php @@ -0,0 +1,239 @@ +jobId = $jobId ?? 'process_dataset_' . uniqid(); + } + + public function handle() + { + $this->checkpointManager = app(CheckpointManager::class); + $this->memoryMonitor = new MemoryPressureMonitor('64M'); + + // Restore checkpoint if exists + $checkpoint = $this->checkpointManager->restore($this->jobId); + $state = $checkpoint ?? [ + 'last_order_id' => 0, + 'processed_count' => 0, + 'analytics' => [ + 'total_revenue' => 0, + 'order_count' => 0, + 'customers' => new SpaceTimeArray(1000), + 'products' => new SpaceTimeArray(1000), + 'daily_stats' => [] + ] + ]; + + $this->processOrders($state); + + // Clean up checkpoint after successful completion + $this->checkpointManager->delete($this->jobId); + + // Save final analytics + $this->saveAnalytics($state['analytics']); + } + + private function processOrders(array &$state) + { + $lastOrderId = $state['last_order_id']; + + Order::where('id', '>', $lastOrderId) + ->with(['customer', 'items.product']) + ->orderBy('id') + ->chunkById(100, function ($orders) use (&$state) { + foreach ($orders as $order) { + // Process order + $this->processOrder($order, $state['analytics']); + + $state['processed_count']++; + $state['last_order_id'] = $order->id; + + // Checkpoint every 100 orders + if ($state['processed_count'] % 100 === 0) { + $this->saveCheckpoint($state); + + // Check memory pressure + if ($this->memoryMonitor->shouldCleanup()) { + // Flush some analytics to database + $this->flushPartialAnalytics($state['analytics']); + } + } + } + }); + } + + private function processOrder(Order $order, array &$analytics) + { + // Update totals + $analytics['total_revenue'] += $order->total_amount; + $analytics['order_count']++; + + // Track customer spending + $customerId = $order->customer_id; + if (!isset($analytics['customers'][$customerId])) { + $analytics['customers'][$customerId] = [ + 'total_spent' => 0, + 'order_count' => 0, + 'last_order_date' => null + ]; + } + + $analytics['customers'][$customerId]['total_spent'] += $order->total_amount; + $analytics['customers'][$customerId]['order_count']++; + $analytics['customers'][$customerId]['last_order_date'] = $order->created_at; + + // Track product sales + foreach ($order->items as $item) { + $productId = $item->product_id; + if (!isset($analytics['products'][$productId])) { + $analytics['products'][$productId] = [ + 'quantity_sold' => 0, + 'revenue' => 0, + 'order_count' => 0 + ]; + } + + $analytics['products'][$productId]['quantity_sold'] += $item->quantity; + $analytics['products'][$productId]['revenue'] += $item->total_price; + $analytics['products'][$productId]['order_count']++; + } + + // Daily statistics + $date = $order->created_at->format('Y-m-d'); + if (!isset($analytics['daily_stats'][$date])) { + $analytics['daily_stats'][$date] = [ + 'revenue' => 0, + 'orders' => 0, + 'unique_customers' => [] + ]; + } + + $analytics['daily_stats'][$date]['revenue'] += $order->total_amount; + $analytics['daily_stats'][$date]['orders']++; + $analytics['daily_stats'][$date]['unique_customers'][$customerId] = true; + } + + private function saveCheckpoint(array $state) + { + $this->checkpointManager->save($this->jobId, [ + 'last_order_id' => $state['last_order_id'], + 'processed_count' => $state['processed_count'], + 'analytics' => [ + 'total_revenue' => $state['analytics']['total_revenue'], + 'order_count' => $state['analytics']['order_count'], + 'customers' => $state['analytics']['customers'], + 'products' => $state['analytics']['products'], + 'daily_stats' => $state['analytics']['daily_stats'] + ] + ]); + + \Log::info("Checkpoint saved", [ + 'job_id' => $this->jobId, + 'processed' => $state['processed_count'] + ]); + } + + private function flushPartialAnalytics(array &$analytics) + { + // Save top customers to database + $topCustomers = $this->getTopItems($analytics['customers'], 'total_spent', 100); + foreach ($topCustomers as $customerId => $data) { + OrderAnalytics::updateOrCreate( + ['type' => 'customer', 'entity_id' => $customerId], + ['data' => json_encode($data)] + ); + } + + // Save top products + $topProducts = $this->getTopItems($analytics['products'], 'revenue', 100); + foreach ($topProducts as $productId => $data) { + OrderAnalytics::updateOrCreate( + ['type' => 'product', 'entity_id' => $productId], + ['data' => json_encode($data)] + ); + } + + // Clear processed items from memory + $analytics['customers'] = new SpaceTimeArray(1000); + $analytics['products'] = new SpaceTimeArray(1000); + + gc_collect_cycles(); + } + + private function getTopItems($items, $sortKey, $limit) + { + $sorted = []; + foreach ($items as $id => $data) { + $sorted[$id] = $data[$sortKey]; + } + + arsort($sorted); + $topIds = array_slice(array_keys($sorted), 0, $limit); + + $result = []; + foreach ($topIds as $id) { + $result[$id] = $items[$id]; + } + + return $result; + } + + private function saveAnalytics(array $analytics) + { + // Save summary + OrderAnalytics::updateOrCreate( + ['type' => 'summary', 'entity_id' => 'global'], + [ + 'data' => json_encode([ + 'total_revenue' => $analytics['total_revenue'], + 'order_count' => $analytics['order_count'], + 'avg_order_value' => $analytics['total_revenue'] / $analytics['order_count'], + 'unique_customers' => count($analytics['customers']), + 'unique_products' => count($analytics['products']), + 'processed_at' => now() + ]) + ] + ); + + // Save daily stats + foreach ($analytics['daily_stats'] as $date => $stats) { + OrderAnalytics::updateOrCreate( + ['type' => 'daily', 'entity_id' => $date], + [ + 'data' => json_encode([ + 'revenue' => $stats['revenue'], + 'orders' => $stats['orders'], + 'unique_customers' => count($stats['unique_customers']), + 'avg_order_value' => $stats['revenue'] / $stats['orders'] + ]) + ] + ); + } + + \Log::info("Analytics processing completed", [ + 'job_id' => $this->jobId, + 'total_processed' => $analytics['order_count'] + ]); + } +} \ No newline at end of file diff --git a/examples/laravel-app/app/Services/ProductService.php b/examples/laravel-app/app/Services/ProductService.php new file mode 100644 index 0000000..2e74456 --- /dev/null +++ b/examples/laravel-app/app/Services/ProductService.php @@ -0,0 +1,224 @@ +memoryMonitor = new MemoryPressureMonitor( + config('spacetime.memory_limit', '128M') + ); + } + + /** + * Search products with memory-efficient sorting + */ + public function searchProducts(string $query, string $sortBy, int $limit): Collection + { + // Get all matching products + $products = Product::where('name', 'like', "%{$query}%") + ->orWhere('description', 'like', "%{$query}%") + ->get() + ->map(function ($product) use ($query) { + // Calculate relevance score + $nameScore = $this->calculateRelevance($product->name, $query) * 2; + $descScore = $this->calculateRelevance($product->description, $query); + $product->relevance_score = $nameScore + $descScore; + return $product; + }); + + // Use external sort for large result sets + if ($products->count() > 1000) { + $sorted = $this->externalSort($products, $sortBy); + } else { + $sorted = $this->inMemorySort($products, $sortBy); + } + + return collect($sorted)->take($limit); + } + + /** + * Get product statistics using external grouping + */ + public function getStatistics(): array + { + $stats = [ + 'total_products' => Product::count(), + 'total_value' => 0, + 'by_category' => [], + 'price_ranges' => [], + 'stock_alerts' => [] + ]; + + // Use SpaceTimeArray for memory efficiency + $allProducts = new SpaceTimeArray(1000); + + Product::chunk(1000, function ($products) use (&$allProducts) { + foreach ($products as $product) { + $allProducts[] = [ + 'category' => $product->category, + 'price' => $product->price, + 'stock' => $product->stock, + 'value' => $product->price * $product->stock + ]; + } + }); + + // Calculate total value + $stats['total_value'] = array_sum(array_column($allProducts->toArray(), 'value')); + + // Group by category using external grouping + $byCategory = ExternalGroupBy::groupBySum( + $allProducts->toArray(), + fn($p) => $p['category'], + fn($p) => $p['value'] + ); + $stats['by_category'] = $byCategory; + + // Price range distribution + $priceRanges = [ + '0-50' => 0, + '50-100' => 0, + '100-500' => 0, + '500+' => 0 + ]; + + foreach ($allProducts as $product) { + if ($product['price'] < 50) { + $priceRanges['0-50']++; + } elseif ($product['price'] < 100) { + $priceRanges['50-100']++; + } elseif ($product['price'] < 500) { + $priceRanges['100-500']++; + } else { + $priceRanges['500+']++; + } + + // Low stock alerts + if ($product['stock'] < 10) { + $stats['stock_alerts'][] = [ + 'category' => $product['category'], + 'stock' => $product['stock'] + ]; + } + } + + $stats['price_ranges'] = $priceRanges; + $stats['memory_usage'] = $this->memoryMonitor->getMemoryInfo(); + + return $stats; + } + + /** + * Import products from CSV with progress tracking + */ + public function importFromCsv(string $filePath, callable $progressCallback = null): array + { + $imported = 0; + $errors = []; + $batchSize = 100; + $batch = []; + + $handle = fopen($filePath, 'r'); + $headers = fgetcsv($handle); // Skip headers + + while (($row = fgetcsv($handle)) !== false) { + try { + $batch[] = [ + 'name' => $row[0], + 'sku' => $row[1], + 'category' => $row[2], + 'price' => (float)$row[3], + 'stock' => (int)$row[4], + 'description' => $row[5] ?? '', + 'created_at' => now(), + 'updated_at' => now() + ]; + + if (count($batch) >= $batchSize) { + Product::insert($batch); + $imported += count($batch); + $batch = []; + + if ($progressCallback) { + $progressCallback($imported); + } + + // Check memory pressure + if ($this->memoryMonitor->shouldCleanup()) { + gc_collect_cycles(); + } + } + } catch (\Exception $e) { + $errors[] = "Row " . ($imported + 1) . ": " . $e->getMessage(); + } + } + + // Insert remaining batch + if (!empty($batch)) { + Product::insert($batch); + $imported += count($batch); + } + + fclose($handle); + + return [ + 'imported' => $imported, + 'errors' => $errors + ]; + } + + private function calculateRelevance(string $text, string $query): float + { + $text = strtolower($text); + $query = strtolower($query); + + // Exact match + if (strpos($text, $query) !== false) { + return 1.0; + } + + // Word match + $words = explode(' ', $query); + $matches = 0; + foreach ($words as $word) { + if (strpos($text, $word) !== false) { + $matches++; + } + } + + return $matches / count($words); + } + + private function externalSort(Collection $products, string $sortBy): array + { + $sortKey = match($sortBy) { + 'price_asc' => fn($p) => $p->price, + 'price_desc' => fn($p) => -$p->price, + 'name' => fn($p) => $p->name, + default => fn($p) => -$p->relevance_score + }; + + return ExternalSort::sortBy($products->toArray(), $sortKey); + } + + private function inMemorySort(Collection $products, string $sortBy): Collection + { + return match($sortBy) { + 'price_asc' => $products->sortBy('price'), + 'price_desc' => $products->sortByDesc('price'), + 'name' => $products->sortBy('name'), + default => $products->sortByDesc('relevance_score') + }; + } +} \ No newline at end of file diff --git a/src/Algorithms/ExternalGroupBy.php b/src/Algorithms/ExternalGroupBy.php new file mode 100644 index 0000000..ffbaf84 --- /dev/null +++ b/src/Algorithms/ExternalGroupBy.php @@ -0,0 +1,196 @@ +exists($key) ? $groups->get($key) : []; + $group[] = $item; + $groups->set($key, $group); + } + + // Build result array + $result = []; + foreach ($groupKeys as $key) { + $result[$key] = $groups->get($key); + } + + return $result; + } finally { + $groups->cleanup(); + } + } + + /** + * Group by with aggregation + */ + public static function groupByAggregate( + iterable $data, + callable $keyExtractor, + callable $aggregator, + mixed $initial = null + ): array { + $aggregates = []; + + foreach ($data as $item) { + $key = (string) $keyExtractor($item); + + if (!isset($aggregates[$key])) { + $aggregates[$key] = $initial; + } + + $aggregates[$key] = $aggregator($aggregates[$key], $item); + } + + return $aggregates; + } + + /** + * Group by with counting + */ + public static function groupByCount(iterable $data, callable $keyExtractor): array + { + return self::groupByAggregate( + $data, + $keyExtractor, + fn($count, $item) => ($count ?? 0) + 1, + 0 + ); + } + + /** + * Group by with sum + */ + public static function groupBySum( + iterable $data, + callable $keyExtractor, + callable $valueExtractor + ): array { + return self::groupByAggregate( + $data, + $keyExtractor, + fn($sum, $item) => ($sum ?? 0) + $valueExtractor($item), + 0 + ); + } + + /** + * Group by with streaming output + */ + public static function groupByStreaming(iterable $data, callable $keyExtractor): \Generator + { + $groups = new ExternalStorage('groupby_stream_' . uniqid()); + $seenKeys = []; + + try { + // Collect all data + foreach ($data as $item) { + $key = (string) $keyExtractor($item); + + if (!in_array($key, $seenKeys, true)) { + $seenKeys[] = $key; + } + + $group = $groups->exists($key) ? $groups->get($key) : []; + $group[] = $item; + $groups->set($key, $group); + } + + // Stream results + foreach ($seenKeys as $key) { + yield $key => $groups->get($key); + } + } finally { + $groups->cleanup(); + } + } + + /** + * Group by with memory limit + */ + public static function groupByWithLimit( + iterable $data, + callable $keyExtractor, + int $maxGroupsInMemory = 1000 + ): array { + $inMemoryGroups = []; + $externalGroups = null; + $allKeys = []; + + foreach ($data as $item) { + $key = (string) $keyExtractor($item); + + if (!in_array($key, $allKeys, true)) { + $allKeys[] = $key; + } + + // Use in-memory storage for small number of groups + if (count($inMemoryGroups) < $maxGroupsInMemory && !isset($externalGroups)) { + if (!isset($inMemoryGroups[$key])) { + $inMemoryGroups[$key] = []; + } + $inMemoryGroups[$key][] = $item; + } else { + // Switch to external storage + if ($externalGroups === null) { + $externalGroups = new ExternalStorage('groupby_limit_' . uniqid()); + + // Move existing groups to external storage + foreach ($inMemoryGroups as $k => $group) { + $externalGroups->set($k, $group); + } + $inMemoryGroups = []; + } + + $group = $externalGroups->exists($key) ? $externalGroups->get($key) : []; + $group[] = $item; + $externalGroups->set($key, $group); + } + } + + // Build result + $result = []; + + if ($externalGroups === null) { + // All groups fit in memory + return $inMemoryGroups; + } + + // Retrieve from external storage + try { + foreach ($allKeys as $key) { + $result[$key] = $externalGroups->get($key); + } + return $result; + } finally { + $externalGroups->cleanup(); + } + } +} \ No newline at end of file diff --git a/src/Algorithms/ExternalSort.php b/src/Algorithms/ExternalSort.php new file mode 100644 index 0000000..8b14efe --- /dev/null +++ b/src/Algorithms/ExternalSort.php @@ -0,0 +1,163 @@ + $a <=> $b; + + // Convert to array if needed + if (!is_array($data)) { + $data = iterator_to_array($data); + } + + $count = count($data); + + // Small datasets can be sorted in memory + if ($count <= 10000) { + usort($data, $comparator); + return $data; + } + + // Calculate chunk size (√n) + $chunkSize = SpaceTimeConfig::calculateSqrtN($count); + + // Phase 1: Sort chunks and write to temporary files + $tempFiles = self::createSortedChunks($data, $chunkSize, $comparator); + + // Phase 2: Merge sorted chunks + $result = self::mergeSortedChunks($tempFiles, $comparator); + + // Cleanup + foreach ($tempFiles as $file) { + unlink($file); + } + + return $result; + } + + /** + * Sort by a specific key + */ + public static function sortBy(iterable $data, callable $keyExtractor, ?callable $comparator = null): array + { + $comparator = $comparator ?? fn($a, $b) => $a <=> $b; + + return self::sort($data, function($a, $b) use ($keyExtractor, $comparator) { + return $comparator($keyExtractor($a), $keyExtractor($b)); + }); + } + + /** + * Create sorted chunks and write to temporary files + */ + private static function createSortedChunks(array $data, int $chunkSize, callable $comparator): array + { + $tempFiles = []; + $chunks = array_chunk($data, $chunkSize, true); + + foreach ($chunks as $chunk) { + // Sort chunk in memory + usort($chunk, $comparator); + + // Write to temporary file + $tempFile = tempnam(SpaceTimeConfig::getStoragePath(), 'sort_'); + $handle = fopen($tempFile, 'wb'); + + foreach ($chunk as $item) { + fwrite($handle, serialize($item) . "\n"); + } + + fclose($handle); + $tempFiles[] = $tempFile; + } + + return $tempFiles; + } + + /** + * Merge sorted chunks using k-way merge + */ + private static function mergeSortedChunks(array $tempFiles, callable $comparator): array + { + $result = []; + $fileHandles = []; + $currentItems = []; + + // Open all files + foreach ($tempFiles as $i => $file) { + $fileHandles[$i] = fopen($file, 'rb'); + $line = fgets($fileHandles[$i]); + if ($line !== false) { + $currentItems[$i] = unserialize(trim($line)); + } + } + + // K-way merge + while (!empty($currentItems)) { + // Find minimum item + $minIndex = null; + $minItem = null; + + foreach ($currentItems as $index => $item) { + if ($minItem === null || $comparator($item, $minItem) < 0) { + $minIndex = $index; + $minItem = $item; + } + } + + // Add minimum to result + $result[] = $minItem; + + // Read next item from the same file + $line = fgets($fileHandles[$minIndex]); + if ($line !== false) { + $currentItems[$minIndex] = unserialize(trim($line)); + } else { + unset($currentItems[$minIndex]); + fclose($fileHandles[$minIndex]); + } + } + + return $result; + } + + /** + * Sort and write directly to a file (for very large datasets) + */ + public static function sortToFile(iterable $data, string $outputFile, ?callable $comparator = null): void + { + $sorted = self::sort($data, $comparator); + + $handle = fopen($outputFile, 'wb'); + foreach ($sorted as $item) { + fwrite($handle, serialize($item) . "\n"); + } + fclose($handle); + } + + /** + * Sort streaming data (returns generator) + */ + public static function sortStreaming(iterable $data, ?callable $comparator = null): \Generator + { + $sorted = self::sort($data, $comparator); + + foreach ($sorted as $item) { + yield $item; + } + } +} \ No newline at end of file diff --git a/src/Batch/BatchJob.php b/src/Batch/BatchJob.php new file mode 100644 index 0000000..e3c47e6 --- /dev/null +++ b/src/Batch/BatchJob.php @@ -0,0 +1,122 @@ +options = array_merge($this->getDefaultOptions(), $options); + $this->processor = new BatchProcessor($this->options); + } + + /** + * Get job ID for checkpointing + */ + public function getJobId(): string + { + return static::class . '_' . $this->getUniqueId(); + } + + /** + * Execute the batch job + */ + public function execute(): BatchResult + { + // Get items to process + $items = $this->getItems(); + + // Process items + $result = $this->processor->process( + $items, + [$this, 'processItem'], + $this->getJobId() + ); + + // Handle completion + if ($result->isComplete()) { + $this->onComplete($result); + } else { + $this->onError($result); + } + + return $result; + } + + /** + * Get items to process + */ + abstract protected function getItems(): iterable; + + /** + * Process single item + */ + abstract public function processItem(array $batch): array; + + /** + * Get unique identifier for this job instance + */ + abstract protected function getUniqueId(): string; + + /** + * Called when job completes successfully + */ + protected function onComplete(BatchResult $result): void + { + // Override in subclass + } + + /** + * Called when job has errors + */ + protected function onError(BatchResult $result): void + { + // Override in subclass + } + + /** + * Get default options + */ + protected function getDefaultOptions(): array + { + return [ + 'batch_size' => null, + 'checkpoint_enabled' => true, + 'max_retries' => 3, + ]; + } + + /** + * Resume job from checkpoint + */ + public function resume(): BatchResult + { + $checkpoint = new CheckpointManager($this->getJobId()); + + if (!$checkpoint->exists()) { + throw new \RuntimeException('No checkpoint found for job: ' . $this->getJobId()); + } + + return $this->execute(); + } + + /** + * Check if job can be resumed + */ + public function canResume(): bool + { + $checkpoint = new CheckpointManager($this->getJobId()); + return $checkpoint->exists(); + } +} \ No newline at end of file diff --git a/src/Batch/BatchProcessor.php b/src/Batch/BatchProcessor.php new file mode 100644 index 0000000..ae89743 --- /dev/null +++ b/src/Batch/BatchProcessor.php @@ -0,0 +1,267 @@ +options = array_merge([ + 'batch_size' => null, // Auto-calculate if null + 'memory_threshold' => 0.8, // 80% memory usage + 'checkpoint_enabled' => true, + 'progress_callback' => null, + 'error_handler' => null, + 'max_retries' => 3, + ], $options); + + $this->memoryMonitor = new MemoryPressureMonitor(); + } + + /** + * Process items in batches + */ + public function process(iterable $items, callable $processor, ?string $checkpointId = null): BatchResult + { + $result = new BatchResult(); + + // Setup checkpoint if enabled + if ($this->options['checkpoint_enabled'] && $checkpointId) { + $this->checkpoint = new CheckpointManager($checkpointId); + + // Resume from checkpoint if exists + if ($this->checkpoint->exists()) { + $state = $this->checkpoint->load(); + $result->restore($state); + } + } + + // Calculate batch size + $batchSize = $this->calculateBatchSize($items); + + // Process batches + $batch = []; + $batchNumber = 0; + + foreach ($items as $key => $item) { + // Skip already processed items + if ($result->isProcessed($key)) { + continue; + } + + $batch[$key] = $item; + + // Process batch when full or memory pressure + if (count($batch) >= $batchSize || $this->shouldProcessBatch()) { + $this->processBatch($batch, $processor, $result, $batchNumber); + $batch = []; + $batchNumber++; + } + } + + // Process remaining items + if (!empty($batch)) { + $this->processBatch($batch, $processor, $result, $batchNumber); + } + + // Clean up checkpoint on success + if ($this->checkpoint && $result->isComplete()) { + $this->checkpoint->delete(); + } + + return $result; + } + + /** + * Process items in parallel batches + */ + public function processParallel(iterable $items, callable $processor, int $workers = 4): BatchResult + { + if (!function_exists('pcntl_fork')) { + throw new \RuntimeException('Parallel processing requires pcntl extension'); + } + + $result = new BatchResult(); + $chunks = $this->splitIntoChunks($items, $workers); + $pids = []; + + foreach ($chunks as $i => $chunk) { + $pid = pcntl_fork(); + + if ($pid === -1) { + throw new \RuntimeException('Failed to fork process'); + } elseif ($pid === 0) { + // Child process + $chunkResult = $this->process($chunk, $processor); + + // Write result to shared memory or file + $this->saveChunkResult($i, $chunkResult); + + exit(0); + } else { + // Parent process + $pids[$i] = $pid; + } + } + + // Wait for all children + foreach ($pids as $i => $pid) { + pcntl_waitpid($pid, $status); + + // Merge chunk result + $chunkResult = $this->loadChunkResult($i); + $result->merge($chunkResult); + } + + return $result; + } + + /** + * Process batch with error handling + */ + private function processBatch(array $batch, callable $processor, BatchResult $result, int $batchNumber): void + { + $retries = 0; + $success = false; + + while (!$success && $retries < $this->options['max_retries']) { + try { + // Call progress callback + if ($this->options['progress_callback']) { + ($this->options['progress_callback'])($batchNumber, count($batch), $result); + } + + // Process batch + $batchResult = $processor($batch); + + // Record results + foreach ($batch as $key => $item) { + $result->addSuccess($key, $batchResult[$key] ?? null); + } + + $success = true; + + } catch (\Exception $e) { + $retries++; + + if ($retries >= $this->options['max_retries']) { + // Record failures + foreach ($batch as $key => $item) { + $result->addError($key, $e); + } + + // Call error handler + if ($this->options['error_handler']) { + ($this->options['error_handler'])($e, $batch); + } + } else { + // Wait before retry + sleep(pow(2, $retries)); // Exponential backoff + } + } + } + + // Save checkpoint + if ($this->checkpoint && $this->checkpoint->shouldCheckpoint()) { + $this->checkpoint->save($result->getState()); + } + } + + /** + * Calculate optimal batch size + */ + private function calculateBatchSize(iterable $items): int + { + if ($this->options['batch_size'] !== null) { + return $this->options['batch_size']; + } + + // Estimate based on available memory + $memoryInfo = $this->memoryMonitor->getMemoryInfo(); + $availableMemory = $memoryInfo['available']; + + // Estimate item size (sample first few items) + $sampleSize = 10; + $totalSize = 0; + $count = 0; + + foreach ($items as $item) { + $totalSize += strlen(serialize($item)); + $count++; + + if ($count >= $sampleSize) { + break; + } + } + + if ($count === 0) { + return 100; // Default + } + + $avgItemSize = $totalSize / $count; + $targetMemoryUsage = $availableMemory * 0.5; // Use 50% of available memory + + return max(10, min(10000, (int)($targetMemoryUsage / $avgItemSize))); + } + + /** + * Check if batch should be processed due to memory pressure + */ + private function shouldProcessBatch(): bool + { + $level = $this->memoryMonitor->check(); + + return $level->isHigherThan(MemoryPressureLevel::MEDIUM); + } + + /** + * Split items into chunks for parallel processing + */ + private function splitIntoChunks(iterable $items, int $numChunks): array + { + $chunks = array_fill(0, $numChunks, []); + $i = 0; + + foreach ($items as $key => $item) { + $chunks[$i % $numChunks][$key] = $item; + $i++; + } + + return $chunks; + } + + /** + * Save chunk result (simplified - use shared memory in production) + */ + private function saveChunkResult(int $chunkId, BatchResult $result): void + { + $filename = sys_get_temp_dir() . "/batch_chunk_{$chunkId}.tmp"; + file_put_contents($filename, serialize($result)); + } + + /** + * Load chunk result + */ + private function loadChunkResult(int $chunkId): BatchResult + { + $filename = sys_get_temp_dir() . "/batch_chunk_{$chunkId}.tmp"; + $result = unserialize(file_get_contents($filename)); + unlink($filename); + + return $result; + } +} \ No newline at end of file diff --git a/src/Batch/BatchResult.php b/src/Batch/BatchResult.php new file mode 100644 index 0000000..f6603c7 --- /dev/null +++ b/src/Batch/BatchResult.php @@ -0,0 +1,206 @@ +startTime = microtime(true); + } + + /** + * Add successful result + */ + public function addSuccess(string|int $key, mixed $result = null): void + { + $this->processed[$key] = true; + $this->results[$key] = $result; + $this->successCount++; + } + + /** + * Add error + */ + public function addError(string|int $key, \Throwable $error): void + { + $this->processed[$key] = true; + $this->errors[$key] = [ + 'message' => $error->getMessage(), + 'code' => $error->getCode(), + 'file' => $error->getFile(), + 'line' => $error->getLine(), + ]; + $this->errorCount++; + } + + /** + * Check if item was processed + */ + public function isProcessed(string|int $key): bool + { + return isset($this->processed[$key]); + } + + /** + * Check if all items were successful + */ + public function isComplete(): bool + { + return $this->errorCount === 0; + } + + /** + * Get total processed count + */ + public function getProcessedCount(): int + { + return $this->successCount + $this->errorCount; + } + + /** + * Get success count + */ + public function getSuccessCount(): int + { + return $this->successCount; + } + + /** + * Get error count + */ + public function getErrorCount(): int + { + return $this->errorCount; + } + + /** + * Get all errors + */ + public function getErrors(): array + { + return $this->errors; + } + + /** + * Get all results + */ + public function getResults(): array + { + return $this->results; + } + + /** + * Get result for specific key + */ + public function getResult(string|int $key): mixed + { + return $this->results[$key] ?? null; + } + + /** + * Get error for specific key + */ + public function getError(string|int $key): ?array + { + return $this->errors[$key] ?? null; + } + + /** + * Get execution time + */ + public function getExecutionTime(): float + { + $endTime = $this->endTime ?? microtime(true); + return $endTime - $this->startTime; + } + + /** + * Mark as finished + */ + public function finish(): void + { + $this->endTime = microtime(true); + } + + /** + * Get state for checkpointing + */ + public function getState(): array + { + return [ + 'processed' => $this->processed, + 'errors' => $this->errors, + 'results' => $this->results, + 'success_count' => $this->successCount, + 'error_count' => $this->errorCount, + 'start_time' => $this->startTime, + ]; + } + + /** + * Restore from checkpoint state + */ + public function restore(array $state): void + { + $this->processed = $state['processed'] ?? []; + $this->errors = $state['errors'] ?? []; + $this->results = $state['results'] ?? []; + $this->successCount = $state['success_count'] ?? 0; + $this->errorCount = $state['error_count'] ?? 0; + $this->startTime = $state['start_time'] ?? microtime(true); + } + + /** + * Merge another result + */ + public function merge(BatchResult $other): void + { + foreach ($other->processed as $key => $value) { + $this->processed[$key] = $value; + } + + foreach ($other->results as $key => $result) { + $this->results[$key] = $result; + } + + foreach ($other->errors as $key => $error) { + $this->errors[$key] = $error; + } + + $this->successCount += $other->successCount; + $this->errorCount += $other->errorCount; + } + + /** + * Get summary statistics + */ + public function getSummary(): array + { + return [ + 'total_processed' => $this->getProcessedCount(), + 'success_count' => $this->successCount, + 'error_count' => $this->errorCount, + 'success_rate' => $this->getProcessedCount() > 0 + ? ($this->successCount / $this->getProcessedCount()) * 100 + : 0, + 'execution_time' => $this->getExecutionTime(), + 'items_per_second' => $this->getExecutionTime() > 0 + ? $this->getProcessedCount() / $this->getExecutionTime() + : 0, + ]; + } +} \ No newline at end of file diff --git a/src/Checkpoint/CacheCheckpointStorage.php b/src/Checkpoint/CacheCheckpointStorage.php new file mode 100644 index 0000000..49ceb1a --- /dev/null +++ b/src/Checkpoint/CacheCheckpointStorage.php @@ -0,0 +1,57 @@ +prefix . $id, $data, $this->ttl); + } + + public function load(string $id): ?array + { + return Cache::get($this->prefix . $id); + } + + public function exists(string $id): bool + { + return Cache::has($this->prefix . $id); + } + + public function delete(string $id): void + { + Cache::forget($this->prefix . $id); + } + + public function list(): array + { + // Note: This is limited by cache driver capabilities + // Some drivers may not support listing keys + return []; + } + + public function cleanup(int $olderThanTimestamp): int + { + // Cache entries expire automatically + return 0; + } + + /** + * Set TTL for checkpoints + */ + public function setTtl(int $seconds): void + { + $this->ttl = max(60, $seconds); + } +} \ No newline at end of file diff --git a/src/Checkpoint/CheckpointManager.php b/src/Checkpoint/CheckpointManager.php new file mode 100644 index 0000000..5d7fa1b --- /dev/null +++ b/src/Checkpoint/CheckpointManager.php @@ -0,0 +1,126 @@ +checkpointId = $checkpointId; + $this->storage = $storage ?? $this->createDefaultStorage(); + $this->checkpointInterval = 60; // seconds + } + + /** + * Save checkpoint data + */ + public function save(array $data): void + { + $checkpoint = [ + 'id' => $this->checkpointId, + 'timestamp' => time(), + 'data' => $data, + ]; + + $this->storage->save($this->checkpointId, $checkpoint); + $this->lastCheckpoint = microtime(true); + } + + /** + * Load checkpoint data + */ + public function load(): ?array + { + $checkpoint = $this->storage->load($this->checkpointId); + + if ($checkpoint === null) { + return null; + } + + return $checkpoint['data'] ?? null; + } + + /** + * Check if checkpoint exists + */ + public function exists(): bool + { + return $this->storage->exists($this->checkpointId); + } + + /** + * Delete checkpoint + */ + public function delete(): void + { + $this->storage->delete($this->checkpointId); + } + + /** + * Check if it's time to checkpoint + */ + public function shouldCheckpoint(): bool + { + if (!SpaceTimeConfig::isCheckpointingEnabled()) { + return false; + } + + $now = microtime(true); + return ($now - $this->lastCheckpoint) >= $this->checkpointInterval; + } + + /** + * Set checkpoint interval + */ + public function setInterval(int $seconds): void + { + $this->checkpointInterval = max(1, $seconds); + } + + /** + * Create checkpoint wrapper for operations + */ + public function wrap(callable $operation, array $initialState = []): mixed + { + // Try to resume from checkpoint + $state = $this->load() ?? $initialState; + + try { + $result = $operation($state, $this); + + // Clean up on success + $this->delete(); + + return $result; + } catch (\Exception $e) { + // Checkpoint remains for retry + throw $e; + } + } + + /** + * Create default storage based on configuration + */ + private function createDefaultStorage(): CheckpointStorage + { + $storageType = config('spacetime.checkpoint_storage', 'file'); + + return match ($storageType) { + 'cache' => new CacheCheckpointStorage(), + 'database' => new DatabaseCheckpointStorage(), + default => new FileCheckpointStorage(), + }; + } +} \ No newline at end of file diff --git a/src/Checkpoint/CheckpointStorage.php b/src/Checkpoint/CheckpointStorage.php new file mode 100644 index 0000000..c60f7df --- /dev/null +++ b/src/Checkpoint/CheckpointStorage.php @@ -0,0 +1,41 @@ +ensureTableExists(); + } + + public function save(string $id, array $data): void + { + DB::table($this->table)->updateOrInsert( + ['checkpoint_id' => $id], + [ + 'checkpoint_id' => $id, + 'data' => serialize($data), + 'created_at' => now(), + 'updated_at' => now(), + ] + ); + } + + public function load(string $id): ?array + { + $checkpoint = DB::table($this->table) + ->where('checkpoint_id', $id) + ->first(); + + if (!$checkpoint) { + return null; + } + + return unserialize($checkpoint->data); + } + + public function exists(string $id): bool + { + return DB::table($this->table) + ->where('checkpoint_id', $id) + ->exists(); + } + + public function delete(string $id): void + { + DB::table($this->table) + ->where('checkpoint_id', $id) + ->delete(); + } + + public function list(): array + { + return DB::table($this->table) + ->select('checkpoint_id as id', 'created_at as timestamp') + ->get() + ->map(fn($row) => [ + 'id' => $row->id, + 'timestamp' => strtotime($row->timestamp), + ]) + ->toArray(); + } + + public function cleanup(int $olderThanTimestamp): int + { + return DB::table($this->table) + ->where('created_at', '<', date('Y-m-d H:i:s', $olderThanTimestamp)) + ->delete(); + } + + /** + * Ensure checkpoints table exists + */ + private function ensureTableExists(): void + { + if (!DB::getSchemaBuilder()->hasTable($this->table)) { + DB::getSchemaBuilder()->create($this->table, function ($table) { + $table->string('checkpoint_id')->primary(); + $table->longText('data'); + $table->timestamps(); + $table->index('created_at'); + }); + } + } +} \ No newline at end of file diff --git a/src/Checkpoint/FileCheckpointStorage.php b/src/Checkpoint/FileCheckpointStorage.php new file mode 100644 index 0000000..9c6581e --- /dev/null +++ b/src/Checkpoint/FileCheckpointStorage.php @@ -0,0 +1,106 @@ +basePath = $basePath ?? SpaceTimeConfig::getStoragePath() . '/checkpoints'; + + if (!is_dir($this->basePath)) { + mkdir($this->basePath, 0755, true); + } + } + + public function save(string $id, array $data): void + { + $filename = $this->getFilename($id); + $content = serialize($data); + + if (SpaceTimeConfig::isCompressionEnabled()) { + $content = gzcompress($content, SpaceTimeConfig::getCompressionLevel()); + } + + file_put_contents($filename, $content, LOCK_EX); + } + + public function load(string $id): ?array + { + $filename = $this->getFilename($id); + + if (!file_exists($filename)) { + return null; + } + + $content = file_get_contents($filename); + + if (SpaceTimeConfig::isCompressionEnabled()) { + $content = gzuncompress($content); + } + + return unserialize($content); + } + + public function exists(string $id): bool + { + return file_exists($this->getFilename($id)); + } + + public function delete(string $id): void + { + $filename = $this->getFilename($id); + + if (file_exists($filename)) { + unlink($filename); + } + } + + public function list(): array + { + $checkpoints = []; + $files = glob($this->basePath . '/*.checkpoint'); + + foreach ($files as $file) { + $id = basename($file, '.checkpoint'); + $checkpoints[] = [ + 'id' => $id, + 'timestamp' => filemtime($file), + 'size' => filesize($file), + ]; + } + + return $checkpoints; + } + + public function cleanup(int $olderThanTimestamp): int + { + $count = 0; + $files = glob($this->basePath . '/*.checkpoint'); + + foreach ($files as $file) { + if (filemtime($file) < $olderThanTimestamp) { + unlink($file); + $count++; + } + } + + return $count; + } + + private function getFilename(string $id): string + { + // Sanitize ID for filesystem + $safeId = preg_replace('/[^a-zA-Z0-9_-]/', '_', $id); + return $this->basePath . '/' . $safeId . '.checkpoint'; + } +} \ No newline at end of file diff --git a/src/Collections/SpaceTimeArray.php b/src/Collections/SpaceTimeArray.php new file mode 100644 index 0000000..94319fe --- /dev/null +++ b/src/Collections/SpaceTimeArray.php @@ -0,0 +1,350 @@ +config = [ + 'threshold' => $thresholdOrConfig, + 'compression' => true, + 'storage' => 'file', + ]; + $this->threshold = $thresholdOrConfig; + } else { + $this->config = array_merge([ + 'threshold' => 'auto', + 'compression' => true, + 'storage' => 'file', + ], $thresholdOrConfig); + + $this->threshold = $this->calculateThreshold(); + } + } + + /** + * Set memory threshold for switching to external storage + */ + public function setThreshold(int $threshold): void + { + $this->threshold = $threshold; + } + + /** + * ArrayAccess: Check if offset exists + */ + public function offsetExists(mixed $offset): bool + { + if (isset($this->hotData[$offset])) { + return true; + } + + if ($this->coldStorage !== null) { + return $this->coldStorage->exists((string)$offset); + } + + return false; + } + + /** + * ArrayAccess: Get value at offset + */ + public function offsetGet(mixed $offset): mixed + { + if (isset($this->hotData[$offset])) { + return $this->hotData[$offset]; + } + + if ($this->coldStorage !== null && $this->coldStorage->exists((string)$offset)) { + return $this->coldStorage->get((string)$offset); + } + + return null; + } + + /** + * ArrayAccess: Set value at offset + */ + public function offsetSet(mixed $offset, mixed $value): void + { + if ($offset === null) { + $offset = $this->count; + } + + // Check if we need to switch to external storage + if (count($this->hotData) >= $this->threshold && !isset($this->hotData[$offset])) { + $this->ensureColdStorage(); + $this->coldStorage->set((string)$offset, $value); + } else { + $this->hotData[$offset] = $value; + } + + if (!in_array($offset, $this->allKeys, true)) { + $this->allKeys[] = $offset; + $this->count++; + } + } + + /** + * ArrayAccess: Unset offset + */ + public function offsetUnset(mixed $offset): void + { + if (isset($this->hotData[$offset])) { + unset($this->hotData[$offset]); + } + + if ($this->coldStorage !== null) { + $this->coldStorage->delete((string)$offset); + } + + $this->allKeys = array_values(array_diff($this->allKeys, [$offset])); + $this->count--; + } + + /** + * Iterator: Rewind to first element + */ + public function rewind(): void + { + $this->iteratorPosition = 0; + } + + /** + * Get hot data (for testing) + */ + public function getHotData(): array + { + return $this->hotData; + } + + /** + * Get cold indices (for testing) + */ + public function getColdIndices(): array + { + // In this implementation, cold items are tracked by which keys are not in hotData + if ($this->coldStorage === null) { + return []; + } + + $coldKeys = []; + foreach ($this->allKeys as $key) { + if (!isset($this->hotData[$key])) { + $coldKeys[] = $key; + } + } + return $coldKeys; + } + + /** + * Iterator: Get current element + */ + public function current(): mixed + { + if (!isset($this->allKeys[$this->iteratorPosition])) { + return null; + } + + $key = $this->allKeys[$this->iteratorPosition]; + return $this->offsetGet($key); + } + + /** + * Iterator: Get current key + */ + public function key(): mixed + { + return $this->allKeys[$this->iteratorPosition] ?? null; + } + + /** + * Iterator: Move to next element + */ + public function next(): void + { + $this->iteratorPosition++; + } + + /** + * Iterator: Check if current position is valid + */ + public function valid(): bool + { + return isset($this->allKeys[$this->iteratorPosition]); + } + + /** + * Countable: Get count of elements + */ + public function count(): int + { + return $this->count; + } + + /** + * Process array in √n chunks + */ + public function chunkBySqrtN(): \Generator + { + $chunkSize = SpaceTimeConfig::calculateSqrtN($this->count); + $chunk = []; + $chunkCount = 0; + + foreach ($this as $key => $value) { + $chunk[$key] = $value; + $chunkCount++; + + if ($chunkCount >= $chunkSize) { + yield $chunk; + $chunk = []; + $chunkCount = 0; + } + } + + if (!empty($chunk)) { + yield $chunk; + } + } + + /** + * Apply callback to each element + */ + public function map(callable $callback): self + { + $result = new self($this->config); + + foreach ($this as $key => $value) { + $result[$key] = $callback($value, $key); + } + + return $result; + } + + /** + * Filter elements using callback + */ + public function filter(callable $callback): self + { + $result = new self($this->config); + + foreach ($this as $key => $value) { + if ($callback($value, $key)) { + $result[$key] = $value; + } + } + + return $result; + } + + /** + * Reduce array to single value + */ + public function reduce(callable $callback, mixed $initial = null): mixed + { + $accumulator = $initial; + + foreach ($this as $key => $value) { + $accumulator = $callback($accumulator, $value, $key); + } + + return $accumulator; + } + + /** + * Convert to regular array (caution with large datasets!) + */ + public function toArray(): array + { + $result = []; + + foreach ($this as $key => $value) { + $result[$key] = $value; + } + + return $result; + } + + /** + * Get memory usage statistics + */ + public function getStats(): array + { + return [ + 'total_items' => $this->count, + 'hot_items' => count($this->hotData), + 'cold_items' => $this->count - count($this->hotData), + 'threshold' => $this->threshold, + 'has_cold_storage' => $this->coldStorage !== null, + 'memory_usage' => memory_get_usage(true), + ]; + } + + /** + * Calculate threshold based on available memory + */ + private function calculateThreshold(): int + { + if ($this->config['threshold'] === 'auto') { + $availableMemory = SpaceTimeConfig::getAvailableMemory(); + $avgItemSize = 1024; // Estimate 1KB per item + return max(100, (int)($availableMemory / $avgItemSize / 10)); // Use 10% of available memory + } + + return (int)$this->config['threshold']; + } + + /** + * Ensure cold storage is initialized + */ + private function ensureColdStorage(): void + { + if ($this->coldStorage === null) { + $this->coldStorage = new ExternalStorage( + 'spacetime_array_' . spl_object_id($this), + $this->config + ); + + // Move some hot data to cold storage if needed + if (count($this->hotData) > $this->threshold) { + $toMove = array_slice($this->hotData, 0, count($this->hotData) - $this->threshold, true); + foreach ($toMove as $key => $value) { + $this->coldStorage->set((string)$key, $value); + unset($this->hotData[$key]); + } + } + } + } + + /** + * Clean up external storage on destruction + */ + public function __destruct() + { + if ($this->coldStorage !== null) { + $this->coldStorage->cleanup(); + } + } +} \ No newline at end of file diff --git a/src/Database/SpaceTimeQueryBuilder.php b/src/Database/SpaceTimeQueryBuilder.php new file mode 100644 index 0000000..8e7744d --- /dev/null +++ b/src/Database/SpaceTimeQueryBuilder.php @@ -0,0 +1,288 @@ +connection = $connection; + } + + /** + * Set table + */ + public function from(string $table): self + { + $this->table = $table; + return $this; + } + + /** + * Select columns + */ + public function select(array $columns): self + { + $this->columns = $columns; + return $this; + } + + /** + * Add where clause + */ + public function where(string $column, string $operator, mixed $value): self + { + $this->wheres[] = compact('column', 'operator', 'value'); + return $this; + } + + /** + * Add order by + */ + public function orderBy(string $column, string $direction = 'asc'): self + { + $this->orderBy[] = compact('column', 'direction'); + return $this; + } + + /** + * Set limit + */ + public function limit(int $limit): self + { + $this->limit = $limit; + return $this; + } + + /** + * Set offset + */ + public function offset(int $offset): self + { + $this->offset = $offset; + return $this; + } + + /** + * Get results as stream + */ + public function stream(): SpaceTimeStream + { + $sql = $this->buildSql(); + $statement = $this->connection->prepare($sql); + $this->bindValues($statement); + + $generator = function() use ($statement) { + $statement->execute(); + + while ($row = $statement->fetch(\PDO::FETCH_ASSOC)) { + yield $row; + } + }; + + return SpaceTimeStream::from($generator()); + } + + /** + * Process in √n chunks + */ + public function chunkBySqrtN(callable $callback): void + { + $total = $this->count(); + $chunkSize = SpaceTimeConfig::calculateSqrtN($total); + + $this->chunk($chunkSize, $callback); + } + + /** + * Process in chunks + */ + public function chunk(int $size, callable $callback): void + { + $offset = 0; + + do { + $results = $this->offset($offset)->limit($size)->get(); + + if (empty($results)) { + break; + } + + if ($callback($results) === false) { + break; + } + + $offset += $size; + } while (count($results) === $size); + } + + /** + * Get all results + */ + public function get(): array + { + $sql = $this->buildSql(); + $statement = $this->connection->prepare($sql); + $this->bindValues($statement); + $statement->execute(); + + return $statement->fetchAll(\PDO::FETCH_ASSOC); + } + + /** + * Count results + */ + public function count(): int + { + $sql = $this->buildCountSql(); + $statement = $this->connection->prepare($sql); + $this->bindValues($statement); + $statement->execute(); + + return (int) $statement->fetchColumn(); + } + + /** + * Order by using external sort + */ + public function orderByExternal(string $column, string $direction = 'asc'): array + { + $results = $this->get(); + + $comparator = $direction === 'asc' + ? fn($a, $b) => $a <=> $b + : fn($a, $b) => $b <=> $a; + + return ExternalSort::sortBy($results, fn($row) => $row[$column], $comparator); + } + + /** + * Group by using external grouping + */ + public function groupByExternal(string $column): array + { + $results = $this->get(); + + return ExternalGroupBy::groupBy($results, fn($row) => $row[$column]); + } + + /** + * Aggregate with grouping + */ + public function groupByAggregate(string $groupColumn, string $aggregateColumn, string $function = 'sum'): array + { + $results = $this->get(); + + return match($function) { + 'sum' => ExternalGroupBy::groupBySum( + $results, + fn($row) => $row[$groupColumn], + fn($row) => $row[$aggregateColumn] + ), + 'count' => ExternalGroupBy::groupByCount( + $results, + fn($row) => $row[$groupColumn] + ), + 'avg' => $this->groupByAverage($results, $groupColumn, $aggregateColumn), + default => throw new \InvalidArgumentException("Unknown aggregate function: $function"), + }; + } + + /** + * Build SQL query + */ + private function buildSql(): string + { + $sql = 'SELECT ' . implode(', ', $this->columns) . ' FROM ' . $this->table; + + if (!empty($this->wheres)) { + $conditions = []; + foreach ($this->wheres as $where) { + $conditions[] = "{$where['column']} {$where['operator']} ?"; + } + $sql .= ' WHERE ' . implode(' AND ', $conditions); + } + + if (!empty($this->orderBy)) { + $orders = []; + foreach ($this->orderBy as $order) { + $orders[] = "{$order['column']} {$order['direction']}"; + } + $sql .= ' ORDER BY ' . implode(', ', $orders); + } + + if ($this->limit !== null) { + $sql .= ' LIMIT ' . $this->limit; + } + + if ($this->offset !== null) { + $sql .= ' OFFSET ' . $this->offset; + } + + return $sql; + } + + /** + * Build count SQL + */ + private function buildCountSql(): string + { + $sql = 'SELECT COUNT(*) FROM ' . $this->table; + + if (!empty($this->wheres)) { + $conditions = []; + foreach ($this->wheres as $where) { + $conditions[] = "{$where['column']} {$where['operator']} ?"; + } + $sql .= ' WHERE ' . implode(' AND ', $conditions); + } + + return $sql; + } + + /** + * Bind values to statement + */ + private function bindValues(\PDOStatement $statement): void + { + $index = 1; + foreach ($this->wheres as $where) { + $statement->bindValue($index++, $where['value']); + } + } + + /** + * Group by average helper + */ + private function groupByAverage(array $data, string $groupColumn, string $aggregateColumn): array + { + $groups = ExternalGroupBy::groupBy($data, fn($row) => $row[$groupColumn]); + $result = []; + + foreach ($groups as $key => $items) { + $sum = array_sum(array_column($items, $aggregateColumn)); + $count = count($items); + $result[$key] = $count > 0 ? $sum / $count : 0; + } + + return $result; + } +} \ No newline at end of file diff --git a/src/File/CsvExporter.php b/src/File/CsvExporter.php new file mode 100644 index 0000000..4c58cdb --- /dev/null +++ b/src/File/CsvExporter.php @@ -0,0 +1,200 @@ +filename = $filename; + $this->options = array_merge([ + 'delimiter' => ',', + 'enclosure' => '"', + 'escape' => '\\', + 'headers' => true, + 'encoding' => 'UTF-8', + 'append' => false, + ], $options); + + $this->open(); + } + + public function __destruct() + { + $this->close(); + } + + /** + * Write single row + */ + public function writeRow(array $row): void + { + if ($this->options['headers'] && !$this->headersWritten) { + $this->writeHeaders(array_keys($row)); + } + + fputcsv( + $this->handle, + array_values($row), + $this->options['delimiter'], + $this->options['enclosure'], + $this->options['escape'] + ); + } + + /** + * Write multiple rows + */ + public function writeRows(iterable $rows): void + { + foreach ($rows as $row) { + $this->writeRow($row); + } + } + + /** + * Write rows in √n chunks + */ + public function writeInChunks(iterable $data, ?int $totalCount = null): void + { + if ($totalCount === null && is_array($data)) { + $totalCount = count($data); + } + + $chunkSize = $totalCount ? SpaceTimeConfig::calculateSqrtN($totalCount) : 1000; + $buffer = []; + + foreach ($data as $row) { + $buffer[] = $row; + + if (count($buffer) >= $chunkSize) { + $this->flushBuffer($buffer); + $buffer = []; + } + } + + // Write remaining rows + if (!empty($buffer)) { + $this->flushBuffer($buffer); + } + } + + /** + * Write from query results + */ + public function writeFromQuery(\PDOStatement $statement): int + { + $count = 0; + + while ($row = $statement->fetch(\PDO::FETCH_ASSOC)) { + $this->writeRow($row); + $count++; + } + + return $count; + } + + /** + * Write with transformation + */ + public function writeWithTransform(iterable $data, callable $transformer): void + { + foreach ($data as $row) { + $transformed = $transformer($row); + if ($transformed !== null) { + $this->writeRow($transformed); + } + } + } + + /** + * Write headers explicitly + */ + public function writeHeaders(array $headers): void + { + if (!$this->headersWritten) { + fputcsv( + $this->handle, + $headers, + $this->options['delimiter'], + $this->options['enclosure'], + $this->options['escape'] + ); + $this->headersWritten = true; + } + } + + /** + * Flush and sync to disk + */ + public function flush(): void + { + if ($this->handle) { + fflush($this->handle); + } + } + + /** + * Get bytes written + */ + public function getBytesWritten(): int + { + if ($this->handle) { + $stat = fstat($this->handle); + return $stat['size'] ?? 0; + } + return 0; + } + + /** + * Open file handle + */ + private function open(): void + { + $mode = $this->options['append'] ? 'a' : 'w'; + $this->handle = fopen($this->filename, $mode); + + if (!$this->handle) { + throw new \RuntimeException("Cannot open file for writing: {$this->filename}"); + } + + // Write BOM for UTF-8 if needed + if (!$this->options['append'] && $this->options['encoding'] === 'UTF-8-BOM') { + fwrite($this->handle, "\xEF\xBB\xBF"); + } + } + + /** + * Close file handle + */ + private function close(): void + { + if ($this->handle) { + fclose($this->handle); + $this->handle = null; + } + } + + /** + * Flush buffer to file + */ + private function flushBuffer(array $buffer): void + { + foreach ($buffer as $row) { + $this->writeRow($row); + } + $this->flush(); + } +} \ No newline at end of file diff --git a/src/File/CsvReader.php b/src/File/CsvReader.php new file mode 100644 index 0000000..188b3ba --- /dev/null +++ b/src/File/CsvReader.php @@ -0,0 +1,198 @@ +filename = $filename; + $this->options = array_merge([ + 'delimiter' => ',', + 'enclosure' => '"', + 'escape' => '\\', + 'headers' => true, + 'encoding' => 'UTF-8', + 'skip_empty' => true, + ], $options); + } + + /** + * Read CSV as stream + */ + public function stream(): SpaceTimeStream + { + return SpaceTimeStream::fromCsv($this->filename, $this->options); + } + + /** + * Read CSV in √n chunks + */ + public function readInChunks(callable $callback): void + { + $totalLines = $this->countLines(); + $chunkSize = SpaceTimeConfig::calculateSqrtN($totalLines); + + $this->stream() + ->chunk($chunkSize) + ->each($callback); + } + + /** + * Read specific columns only + */ + public function readColumns(array $columns): SpaceTimeStream + { + return $this->stream()->map(function($row) use ($columns) { + return array_intersect_key($row, array_flip($columns)); + }); + } + + /** + * Read with type conversion + */ + public function readWithTypes(array $types): SpaceTimeStream + { + return $this->stream()->map(function($row) use ($types) { + foreach ($types as $column => $type) { + if (isset($row[$column])) { + $row[$column] = $this->convertType($row[$column], $type); + } + } + return $row; + }); + } + + /** + * Get column statistics + */ + public function getColumnStats(string $column): array + { + $stats = [ + 'count' => 0, + 'null_count' => 0, + 'unique_count' => 0, + 'min' => null, + 'max' => null, + 'sum' => 0, + 'values' => [], + ]; + + $this->stream()->each(function($row) use ($column, &$stats) { + $stats['count']++; + + if (!isset($row[$column]) || $row[$column] === '') { + $stats['null_count']++; + return; + } + + $value = $row[$column]; + + // Track unique values (up to a limit) + if (count($stats['values']) < 1000) { + $stats['values'][$value] = ($stats['values'][$value] ?? 0) + 1; + } + + // Numeric stats + if (is_numeric($value)) { + $numValue = (float) $value; + $stats['sum'] += $numValue; + + if ($stats['min'] === null || $numValue < $stats['min']) { + $stats['min'] = $numValue; + } + + if ($stats['max'] === null || $numValue > $stats['max']) { + $stats['max'] = $numValue; + } + } + }); + + $stats['unique_count'] = count($stats['values']); + $stats['avg'] = $stats['count'] > 0 ? $stats['sum'] / $stats['count'] : 0; + + // Find most common values + arsort($stats['values']); + $stats['most_common'] = array_slice($stats['values'], 0, 10, true); + unset($stats['values']); // Remove full list to save memory + + return $stats; + } + + /** + * Validate CSV structure + */ + public function validate(): array + { + $errors = []; + $lineNumber = 0; + $expectedColumns = null; + + $this->stream()->each(function($row) use (&$errors, &$lineNumber, &$expectedColumns) { + $lineNumber++; + + if ($expectedColumns === null) { + $expectedColumns = count($row); + } elseif (count($row) !== $expectedColumns) { + $errors[] = [ + 'line' => $lineNumber, + 'error' => 'Column count mismatch', + 'expected' => $expectedColumns, + 'actual' => count($row), + ]; + } + + // Additional validation can be added here + }); + + return $errors; + } + + /** + * Count lines in file + */ + private function countLines(): int + { + $count = 0; + $handle = fopen($this->filename, 'r'); + + while (!feof($handle)) { + fgets($handle); + $count++; + } + + fclose($handle); + + return $count; + } + + /** + * Convert value to specified type + */ + private function convertType(mixed $value, string $type): mixed + { + return match($type) { + 'int', 'integer' => (int) $value, + 'float', 'double' => (float) $value, + 'bool', 'boolean' => filter_var($value, FILTER_VALIDATE_BOOLEAN), + 'date' => new \DateTime($value), + 'json' => json_decode($value, true), + default => $value, + }; + } +} \ No newline at end of file diff --git a/src/File/JsonLinesProcessor.php b/src/File/JsonLinesProcessor.php new file mode 100644 index 0000000..3639485 --- /dev/null +++ b/src/File/JsonLinesProcessor.php @@ -0,0 +1,198 @@ +map(fn($line) => json_decode($line, true)) + ->filter(fn($data) => $data !== null); + } + + /** + * Write data to JSONL file + */ + public static function write(iterable $data, string $filename, bool $append = false): int + { + $mode = $append ? 'a' : 'w'; + $handle = fopen($filename, $mode); + + if (!$handle) { + throw new \RuntimeException("Cannot open file for writing: $filename"); + } + + $count = 0; + + try { + foreach ($data as $item) { + $json = json_encode($item, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); + if ($json === false) { + throw new \RuntimeException('JSON encoding failed: ' . json_last_error_msg()); + } + + fwrite($handle, $json . "\n"); + $count++; + } + } finally { + fclose($handle); + } + + return $count; + } + + /** + * Process JSONL file in √n chunks + */ + public static function processInChunks(string $filename, callable $processor): void + { + $totalLines = self::countLines($filename); + $chunkSize = SpaceTimeConfig::calculateSqrtN($totalLines); + + self::read($filename) + ->chunk($chunkSize) + ->each($processor); + } + + /** + * Merge multiple JSONL files + */ + public static function merge(array $filenames, string $outputFile): int + { + $count = 0; + $handle = fopen($outputFile, 'w'); + + if (!$handle) { + throw new \RuntimeException("Cannot open output file: $outputFile"); + } + + try { + foreach ($filenames as $filename) { + $stream = self::read($filename); + + $stream->each(function($item) use ($handle, &$count) { + fwrite($handle, json_encode($item) . "\n"); + $count++; + }); + } + } finally { + fclose($handle); + } + + return $count; + } + + /** + * Split JSONL file into multiple files + */ + public static function split(string $filename, int $linesPerFile, string $outputPrefix): array + { + $files = []; + $fileIndex = 0; + $currentLines = 0; + $currentHandle = null; + + try { + self::read($filename)->each(function($item) use ( + &$files, + &$fileIndex, + &$currentLines, + &$currentHandle, + $linesPerFile, + $outputPrefix + ) { + // Open new file if needed + if ($currentLines === 0) { + $outputFile = sprintf('%s_%04d.jsonl', $outputPrefix, $fileIndex); + $currentHandle = fopen($outputFile, 'w'); + $files[] = $outputFile; + } + + // Write line + fwrite($currentHandle, json_encode($item) . "\n"); + $currentLines++; + + // Close file if limit reached + if ($currentLines >= $linesPerFile) { + fclose($currentHandle); + $currentHandle = null; + $currentLines = 0; + $fileIndex++; + } + }); + + // Close last file if open + if ($currentHandle) { + fclose($currentHandle); + } + } catch (\Exception $e) { + // Clean up on error + if ($currentHandle) { + fclose($currentHandle); + } + throw $e; + } + + return $files; + } + + /** + * Filter JSONL file + */ + public static function filter(string $inputFile, string $outputFile, callable $predicate): int + { + $count = 0; + + $filtered = self::read($inputFile) + ->filter($predicate) + ->toArray(); + + return self::write($filtered, $outputFile); + } + + /** + * Transform JSONL file + */ + public static function transform(string $inputFile, string $outputFile, callable $transformer): int + { + $transformed = self::read($inputFile) + ->map($transformer) + ->filter(fn($item) => $item !== null); + + return self::write($transformed, $outputFile); + } + + /** + * Count lines in file + */ + private static function countLines(string $filename): int + { + $count = 0; + $handle = fopen($filename, 'r'); + + if (!$handle) { + throw new \RuntimeException("Cannot open file: $filename"); + } + + while (!feof($handle)) { + fgets($handle); + $count++; + } + + fclose($handle); + + return $count; + } +} \ No newline at end of file diff --git a/src/Laravel/SpaceTimeServiceProvider.php b/src/Laravel/SpaceTimeServiceProvider.php new file mode 100644 index 0000000..a74a17e --- /dev/null +++ b/src/Laravel/SpaceTimeServiceProvider.php @@ -0,0 +1,155 @@ +mergeConfigFrom(__DIR__ . '/../../config/spacetime.php', 'spacetime'); + + // Configure SpaceTime with Laravel config + $this->app->booted(function () { + SpaceTimeConfig::configure([ + 'memory_limit' => config('spacetime.memory_limit', '256M'), + 'external_storage_path' => config('spacetime.storage_path', storage_path('spacetime')), + 'chunk_strategy' => config('spacetime.chunk_strategy', 'sqrt_n'), + 'enable_checkpointing' => config('spacetime.enable_checkpointing', true), + 'compression' => config('spacetime.compression', true), + ]); + }); + } + + /** + * Bootstrap services + */ + public function boot(): void + { + // Publish config + $this->publishes([ + __DIR__ . '/../../config/spacetime.php' => config_path('spacetime.php'), + ], 'spacetime-config'); + + // Register Collection macros + $this->registerCollectionMacros(); + + // Register Query Builder macros + $this->registerQueryBuilderMacros(); + } + + /** + * Register Collection macros + */ + private function registerCollectionMacros(): void + { + // Sort using external memory + Collection::macro('sortByExternal', function ($callback = null) { + $items = $this->all(); + + if ($callback) { + $sorted = ExternalSort::sortBy($items, $callback); + } else { + $sorted = ExternalSort::sort($items); + } + + return new static($sorted); + }); + + // Sort by key using external memory + Collection::macro('sortByDescExternal', function ($callback) { + $items = $this->all(); + $sorted = ExternalSort::sortBy($items, $callback, fn($a, $b) => $b <=> $a); + return new static($sorted); + }); + + // Group by using external memory + Collection::macro('groupByExternal', function ($groupBy) { + $callback = $this->valueRetriever($groupBy); + $grouped = ExternalGroupBy::groupBy($this->all(), $callback); + + return new static($grouped); + }); + + // Chunk by √n + Collection::macro('chunkBySqrtN', function () { + $size = SpaceTimeConfig::calculateSqrtN($this->count()); + return $this->chunk($size); + }); + + // Process in √n batches + Collection::macro('eachBySqrtN', function ($callback) { + $this->chunkBySqrtN()->each(function ($chunk) use ($callback) { + $chunk->each($callback); + }); + }); + + // Map with checkpointing + Collection::macro('mapWithCheckpoint', function ($callback, $checkpointKey = null) { + $checkpointKey = $checkpointKey ?: 'collection_map_' . uniqid(); + $checkpoint = new \Ubiquity\SpaceTime\Checkpoint\CheckpointManager($checkpointKey); + + $result = []; + $processed = 0; + + foreach ($this->all() as $key => $value) { + $result[$key] = $callback($value, $key); + $processed++; + + if ($checkpoint->shouldCheckpoint()) { + $checkpoint->save([ + 'processed' => $processed, + 'result' => $result, + ]); + } + } + + return new static($result); + }); + } + + /** + * Register Query Builder macros + */ + private function registerQueryBuilderMacros(): void + { + // Chunk by √n + \Illuminate\Database\Query\Builder::macro('chunkBySqrtN', function ($callback) { + $total = $this->count(); + $chunkSize = SpaceTimeConfig::calculateSqrtN($total); + + return $this->chunk($chunkSize, $callback); + }); + + // Order by external + \Illuminate\Database\Query\Builder::macro('orderByExternal', function ($column, $direction = 'asc') { + // This is a placeholder - in practice, you'd implement + // external sorting at the query level + return $this->orderBy($column, $direction); + }); + + // Get with √n memory usage + \Illuminate\Database\Query\Builder::macro('getBySqrtN', function () { + $results = collect(); + + $this->chunkBySqrtN(function ($chunk) use ($results) { + $results = $results->merge($chunk); + }); + + return $results; + }); + } +} \ No newline at end of file diff --git a/src/Memory/Handlers/CacheEvictionHandler.php b/src/Memory/Handlers/CacheEvictionHandler.php new file mode 100644 index 0000000..a3ddc8a --- /dev/null +++ b/src/Memory/Handlers/CacheEvictionHandler.php @@ -0,0 +1,85 @@ +value => 0.1, // Evict 10% + MemoryPressureLevel::MEDIUM->value => 0.25, // Evict 25% + MemoryPressureLevel::HIGH->value => 0.5, // Evict 50% + MemoryPressureLevel::CRITICAL->value => 0.9, // Evict 90% + ]; + + /** + * Register a cache that can be evicted + */ + public function registerCache(EvictableCache $cache, int $priority = 0): void + { + $this->caches[] = ['cache' => $cache, 'priority' => $priority]; + + // Sort by priority (lower number = higher priority to keep) + usort($this->caches, fn($a, $b) => $b['priority'] <=> $a['priority']); + } + + public function shouldHandle(MemoryPressureLevel $level): bool + { + return $level !== MemoryPressureLevel::NONE; + } + + public function handle(MemoryPressureLevel $level, array $memoryInfo): void + { + $evictionRate = $this->evictionRates[$level->value] ?? 0; + + if ($evictionRate === 0) { + return; + } + + // Evict from lowest priority caches first + foreach ($this->caches as $cacheInfo) { + $cache = $cacheInfo['cache']; + $size = $cache->size(); + + if ($size > 0) { + $toEvict = (int) ceil($size * $evictionRate); + $cache->evict($toEvict); + + // Check if pressure is relieved + $currentUsage = memory_get_usage(true); + if ($currentUsage < $memoryInfo['limit'] * 0.7) { + break; + } + } + } + } +} + +/** + * Interface for caches that support eviction + */ +interface EvictableCache +{ + /** + * Get current cache size + */ + public function size(): int; + + /** + * Evict n entries from cache + */ + public function evict(int $count): void; + + /** + * Clear entire cache + */ + public function clear(): void; +} \ No newline at end of file diff --git a/src/Memory/Handlers/GarbageCollectionHandler.php b/src/Memory/Handlers/GarbageCollectionHandler.php new file mode 100644 index 0000000..1b54b10 --- /dev/null +++ b/src/Memory/Handlers/GarbageCollectionHandler.php @@ -0,0 +1,55 @@ +isHigherThan(MemoryPressureLevel::LOW); + } + + public function handle(MemoryPressureLevel $level, array $memoryInfo): void + { + $now = microtime(true); + + // Don't collect too frequently + if ($now - $this->lastCollection < $this->minInterval) { + return; + } + + // Force collection for high/critical pressure + if ($level->isHigherThan(MemoryPressureLevel::MEDIUM)) { + $this->forceCollection(); + $this->lastCollection = $now; + } + } + + private function forceCollection(): void + { + // Enable GC if disabled + $wasEnabled = gc_enabled(); + if (!$wasEnabled) { + gc_enable(); + } + + // Collect cycles + $collected = gc_collect_cycles(); + + // Restore previous state + if (!$wasEnabled) { + gc_disable(); + } + } +} \ No newline at end of file diff --git a/src/Memory/Handlers/LoggingHandler.php b/src/Memory/Handlers/LoggingHandler.php new file mode 100644 index 0000000..a4231d7 --- /dev/null +++ b/src/Memory/Handlers/LoggingHandler.php @@ -0,0 +1,59 @@ +logger = $logger ?? new NullLogger(); + $this->minLevel = $minLevel; + } + + public function shouldHandle(MemoryPressureLevel $level): bool + { + return $level->isHigherThan($this->minLevel) || $level === $this->minLevel; + } + + public function handle(MemoryPressureLevel $level, array $memoryInfo): void + { + $context = [ + 'level' => $level->value, + 'usage' => $this->formatBytes($memoryInfo['usage']), + 'limit' => $this->formatBytes($memoryInfo['limit']), + 'percentage' => round($memoryInfo['percentage'], 2), + 'available' => $this->formatBytes($memoryInfo['available']), + ]; + + match ($level) { + MemoryPressureLevel::CRITICAL => $this->logger->critical('Critical memory pressure detected', $context), + MemoryPressureLevel::HIGH => $this->logger->error('High memory pressure detected', $context), + MemoryPressureLevel::MEDIUM => $this->logger->warning('Medium memory pressure detected', $context), + MemoryPressureLevel::LOW => $this->logger->info('Low memory pressure detected', $context), + default => $this->logger->debug('Memory pressure check', $context), + }; + } + + private function formatBytes(float $bytes): string + { + $units = ['B', 'KB', 'MB', 'GB']; + $factor = floor((strlen((string)(int)$bytes) - 1) / 3); + + return sprintf("%.2f %s", $bytes / pow(1024, $factor), $units[$factor]); + } +} \ No newline at end of file diff --git a/src/Memory/MemoryPressureMonitor.php b/src/Memory/MemoryPressureMonitor.php new file mode 100644 index 0000000..6b03bc0 --- /dev/null +++ b/src/Memory/MemoryPressureMonitor.php @@ -0,0 +1,168 @@ +memoryLimit = (float) $memoryLimit; + } else { + $this->memoryLimit = $this->parseMemoryLimit($memoryLimit ?? ini_get('memory_limit')); + } + } + + /** + * Register a pressure handler + */ + public function registerHandler(MemoryPressureHandler $handler): void + { + $this->handlers[] = $handler; + } + + /** + * Check current memory pressure + */ + public function check(): MemoryPressureLevel + { + $now = microtime(true); + + // Throttle checks + if ($now - $this->lastCheck < $this->checkInterval) { + return $this->getCurrentLevel(); + } + + $this->lastCheck = $now; + $level = $this->getCurrentLevel(); + + // Notify handlers + foreach ($this->handlers as $handler) { + if ($handler->shouldHandle($level)) { + $handler->handle($level, $this->getMemoryInfo()); + } + } + + return $level; + } + + /** + * Get current memory pressure level + */ + public function getCurrentLevel(): MemoryPressureLevel + { + $usage = memory_get_usage(true); + $percentage = ($usage / $this->memoryLimit) * 100; + + if ($percentage >= 95) { + return MemoryPressureLevel::CRITICAL; + } elseif ($percentage >= 85) { + return MemoryPressureLevel::HIGH; + } elseif ($percentage >= 70) { + return MemoryPressureLevel::MEDIUM; + } elseif ($percentage >= 50) { + return MemoryPressureLevel::LOW; + } + + return MemoryPressureLevel::NONE; + } + + /** + * Get detailed memory information + */ + public function getMemoryInfo(): array + { + $usage = memory_get_usage(true); + $realUsage = memory_get_usage(false); + + return [ + 'limit' => $this->memoryLimit, + 'usage' => $usage, + 'real_usage' => $realUsage, + 'percentage' => ($usage / $this->memoryLimit) * 100, + 'available' => $this->memoryLimit - $usage, + 'peak_usage' => memory_get_peak_usage(true), + 'peak_real_usage' => memory_get_peak_usage(false), + ]; + } + + /** + * Force garbage collection if possible + */ + public function forceCleanup(): void + { + if (function_exists('gc_collect_cycles')) { + gc_collect_cycles(); + } + } + + /** + * Parse memory limit string to bytes + */ + private function parseMemoryLimit(string $limit): float + { + $limit = trim($limit); + + if ($limit === '-1') { + return PHP_FLOAT_MAX; + } + + $unit = strtolower($limit[strlen($limit) - 1]); + $value = (float) $limit; + + switch ($unit) { + case 'g': + $value *= 1024; + case 'm': + $value *= 1024; + case 'k': + $value *= 1024; + } + + return $value; + } +} + +/** + * Memory pressure levels + */ +enum MemoryPressureLevel: string +{ + case NONE = 'none'; + case LOW = 'low'; + case MEDIUM = 'medium'; + case HIGH = 'high'; + case CRITICAL = 'critical'; + + public function isHigherThan(self $other): bool + { + $order = [ + self::NONE->value => 0, + self::LOW->value => 1, + self::MEDIUM->value => 2, + self::HIGH->value => 3, + self::CRITICAL->value => 4, + ]; + + return $order[$this->value] > $order[$other->value]; + } +} + +/** + * Interface for memory pressure handlers + */ +interface MemoryPressureHandler +{ + public function shouldHandle(MemoryPressureLevel $level): bool; + public function handle(MemoryPressureLevel $level, array $memoryInfo): void; +} \ No newline at end of file diff --git a/src/SpaceTimeConfig.php b/src/SpaceTimeConfig.php new file mode 100644 index 0000000..5cd4087 --- /dev/null +++ b/src/SpaceTimeConfig.php @@ -0,0 +1,196 @@ + 134217728, // 128MB default + 'external_storage_path' => null, + 'chunk_strategy' => 'sqrt_n', + 'enable_checkpointing' => true, + 'checkpoint_interval' => 'auto', + 'compression' => true, + 'compression_level' => 6, + 'storage_driver' => 'file', + 'enable_profiling' => false, + ]; + + private static array $storageDrivers = []; + private static ?string $tempPath = null; + + /** + * Configure SpaceTime globally + */ + public static function configure(array $config): void + { + self::$config = array_merge(self::$config, $config); + + // Convert memory limit string to bytes if needed + if (is_string(self::$config['memory_limit'])) { + self::$config['memory_limit'] = self::parseMemoryLimit(self::$config['memory_limit']); + } + } + + /** + * Get configuration value + */ + public static function get(string $key, mixed $default = null): mixed + { + return self::$config[$key] ?? $default; + } + + /** + * Set configuration value + */ + public static function set(string $key, mixed $value): void + { + self::$config[$key] = $value; + } + + /** + * Get memory limit in bytes + */ + public static function getMemoryLimit(): int + { + return (int) self::$config['memory_limit']; + } + + /** + * Get external storage path + */ + public static function getStoragePath(): string + { + if (self::$config['external_storage_path'] === null) { + if (self::$tempPath === null) { + self::$tempPath = sys_get_temp_dir() . '/spacetime_' . getmypid(); + if (!is_dir(self::$tempPath)) { + mkdir(self::$tempPath, 0777, true); + } + } + return self::$tempPath; + } + + return self::$config['external_storage_path']; + } + + /** + * Calculate √n for a given size + */ + public static function calculateSqrtN(int $n): int + { + return max(1, (int) sqrt($n)); + } + + /** + * Calculate optimal chunk size based on available memory + */ + public static function calculateOptimalChunkSize(int $totalItems, int $itemSize = 1024): int + { + $availableMemory = self::getAvailableMemory(); + $memoryLimit = self::getMemoryLimit(); + $useableMemory = min($availableMemory, $memoryLimit) * 0.8; // Use 80% of available + + $strategy = self::$config['chunk_strategy']; + + return match ($strategy) { + 'sqrt_n' => self::calculateSqrtN($totalItems), + 'memory_based' => max(1, (int) ($useableMemory / $itemSize)), + 'fixed' => 1000, + default => self::calculateSqrtN($totalItems), + }; + } + + /** + * Get available memory + */ + public static function getAvailableMemory(): int + { + $limit = self::parseMemoryLimit(ini_get('memory_limit')); + $used = memory_get_usage(true); + + if ($limit === -1) { + // No memory limit, use 1GB as reasonable default + return 1073741824 - $used; + } + + return max(0, $limit - $used); + } + + /** + * Parse memory limit string to bytes + */ + private static function parseMemoryLimit(string $limit): int + { + $limit = trim($limit); + + if ($limit === '-1') { + return -1; + } + + $last = strtolower($limit[strlen($limit) - 1]); + $value = (int) $limit; + + switch ($last) { + case 'g': + $value *= 1024; + // no break + case 'm': + $value *= 1024; + // no break + case 'k': + $value *= 1024; + } + + return $value; + } + + /** + * Register a storage driver + */ + public static function registerStorageDriver(string $name, string $class): void + { + self::$storageDrivers[$name] = $class; + } + + /** + * Get storage driver class + */ + public static function getStorageDriver(string $name): ?string + { + return self::$storageDrivers[$name] ?? null; + } + + /** + * Cleanup temporary files + */ + public static function cleanup(): void + { + if (self::$tempPath !== null && is_dir(self::$tempPath)) { + self::recursiveRemove(self::$tempPath); + self::$tempPath = null; + } + } + + /** + * Recursively remove directory + */ + private static function recursiveRemove(string $dir): void + { + if (!is_dir($dir)) { + return; + } + + $files = array_diff(scandir($dir), ['.', '..']); + foreach ($files as $file) { + $path = $dir . '/' . $file; + is_dir($path) ? self::recursiveRemove($path) : unlink($path); + } + rmdir($dir); + } +} \ No newline at end of file diff --git a/src/Storage/ExternalStorage.php b/src/Storage/ExternalStorage.php new file mode 100644 index 0000000..1b250e0 --- /dev/null +++ b/src/Storage/ExternalStorage.php @@ -0,0 +1,187 @@ +prefix = $prefix; + $this->config = array_merge([ + 'compression' => true, + 'compression_level' => 6, + ], $config); + + $this->basePath = SpaceTimeConfig::getStoragePath(); + if (!is_dir($this->basePath)) { + mkdir($this->basePath, 0777, true); + } + } + + /** + * Store a value + */ + public function set(string $key, mixed $value): void + { + $serialized = serialize($value); + + if ($this->config['compression']) { + $serialized = gzcompress($serialized, $this->config['compression_level']); + } + + $length = strlen($serialized); + + // Check if we need a new file + if ($this->currentFile === null || $this->currentFileSize + $length > self::MAX_FILE_SIZE) { + $this->rotateFile(); + } + + $offset = ftell($this->currentFile); + fwrite($this->currentFile, $serialized); + + $this->index[$key] = [ + 'file' => $this->currentFilePath, + 'offset' => $offset, + 'length' => $length, + ]; + + $this->currentFileSize += $length; + } + + /** + * Retrieve a value + */ + public function get(string $key): mixed + { + if (!isset($this->index[$key])) { + return null; + } + + $info = $this->index[$key]; + $handle = fopen($info['file'], 'rb'); + + if (!$handle) { + return null; + } + + fseek($handle, $info['offset']); + $data = fread($handle, $info['length']); + fclose($handle); + + if ($this->config['compression']) { + $data = gzuncompress($data); + } + + return unserialize($data); + } + + /** + * Check if key exists + */ + public function exists(string $key): bool + { + return isset($this->index[$key]); + } + + /** + * Delete a value + */ + public function delete(string $key): void + { + unset($this->index[$key]); + // Note: We don't actually remove from file to avoid fragmentation + // Files are cleaned up when the storage is destroyed + } + + /** + * Get all keys + */ + public function keys(): array + { + return array_keys($this->index); + } + + /** + * Get storage statistics + */ + public function getStats(): array + { + $totalSize = 0; + $fileCount = 0; + + foreach (glob($this->basePath . '/' . $this->prefix . '_*.dat') as $file) { + $totalSize += filesize($file); + $fileCount++; + } + + return [ + 'keys' => count($this->index), + 'files' => $fileCount, + 'total_size' => $totalSize, + 'compression' => $this->config['compression'], + ]; + } + + /** + * Rotate to a new file + */ + private function rotateFile(): void + { + if ($this->currentFile !== null) { + fclose($this->currentFile); + } + + $this->fileCounter++; + $this->currentFilePath = $this->basePath . '/' . $this->prefix . '_' . $this->fileCounter . '.dat'; + $this->currentFile = fopen($this->currentFilePath, 'wb'); + $this->currentFileSize = 0; + + if (!$this->currentFile) { + throw new \RuntimeException("Failed to create storage file: {$this->currentFilePath}"); + } + } + + /** + * Clean up all storage files + */ + public function cleanup(): void + { + if ($this->currentFile !== null) { + fclose($this->currentFile); + $this->currentFile = null; + } + + foreach (glob($this->basePath . '/' . $this->prefix . '_*.dat') as $file) { + unlink($file); + } + + $this->index = []; + } + + /** + * Destructor + */ + public function __destruct() + { + if ($this->currentFile !== null) { + fclose($this->currentFile); + } + } +} \ No newline at end of file diff --git a/src/Streams/Iterators/ChunkIterator.php b/src/Streams/Iterators/ChunkIterator.php new file mode 100644 index 0000000..191c540 --- /dev/null +++ b/src/Streams/Iterators/ChunkIterator.php @@ -0,0 +1,60 @@ +iterator = $iterator; + $this->chunkSize = max(1, $chunkSize); + } + + public function rewind(): void + { + $this->iterator->rewind(); + $this->position = 0; + $this->loadNextChunk(); + } + + public function current(): mixed + { + return $this->currentChunk; + } + + public function key(): mixed + { + return $this->position; + } + + public function next(): void + { + $this->position++; + $this->loadNextChunk(); + } + + public function valid(): bool + { + return $this->currentChunk !== null; + } + + private function loadNextChunk(): void + { + $chunk = []; + + for ($i = 0; $i < $this->chunkSize && $this->iterator->valid(); $i++) { + $chunk[] = $this->iterator->current(); + $this->iterator->next(); + } + + $this->currentChunk = empty($chunk) ? null : $chunk; + } +} \ No newline at end of file diff --git a/src/Streams/Iterators/FilterIterator.php b/src/Streams/Iterators/FilterIterator.php new file mode 100644 index 0000000..36c422a --- /dev/null +++ b/src/Streams/Iterators/FilterIterator.php @@ -0,0 +1,22 @@ +predicate = $predicate; + } + + public function accept(): bool + { + return ($this->predicate)($this->current()); + } +} \ No newline at end of file diff --git a/src/Streams/Iterators/FlatMapIterator.php b/src/Streams/Iterators/FlatMapIterator.php new file mode 100644 index 0000000..3f8e50c --- /dev/null +++ b/src/Streams/Iterators/FlatMapIterator.php @@ -0,0 +1,79 @@ +iterator = $iterator; + $this->callback = $callback; + } + + public function rewind(): void + { + $this->iterator->rewind(); + $this->advance(); + } + + public function current(): mixed + { + return $this->currentInnerIterator?->current(); + } + + public function key(): mixed + { + return null; // Keys are not preserved in flatMap + } + + public function next(): void + { + if ($this->currentInnerIterator) { + $this->currentInnerIterator->next(); + if (!$this->currentInnerIterator->valid()) { + $this->iterator->next(); + $this->advance(); + } + } + } + + public function valid(): bool + { + return $this->currentInnerIterator && $this->currentInnerIterator->valid(); + } + + private function advance(): void + { + while ($this->iterator->valid()) { + $result = ($this->callback)($this->iterator->current()); + + if (is_array($result)) { + $this->currentInnerIterator = new \ArrayIterator($result); + } elseif ($result instanceof \Iterator) { + $this->currentInnerIterator = $result; + } elseif ($result instanceof \IteratorAggregate) { + $this->currentInnerIterator = $result->getIterator(); + } else { + // Single value, wrap in array + $this->currentInnerIterator = new \ArrayIterator([$result]); + } + + $this->currentInnerIterator->rewind(); + if ($this->currentInnerIterator->valid()) { + return; + } + + $this->iterator->next(); + } + + $this->currentInnerIterator = null; + } +} \ No newline at end of file diff --git a/src/Streams/Iterators/MapIterator.php b/src/Streams/Iterators/MapIterator.php new file mode 100644 index 0000000..c88bc79 --- /dev/null +++ b/src/Streams/Iterators/MapIterator.php @@ -0,0 +1,27 @@ +callback = $callback; + } + + public function accept(): bool + { + return true; // Accept all elements + } + + public function current(): mixed + { + return ($this->callback)(parent::current()); + } +} \ No newline at end of file diff --git a/src/Streams/Iterators/SkipIterator.php b/src/Streams/Iterators/SkipIterator.php new file mode 100644 index 0000000..08ed21a --- /dev/null +++ b/src/Streams/Iterators/SkipIterator.php @@ -0,0 +1,39 @@ +skip = $skip; + } + + public function rewind(): void + { + parent::rewind(); + $this->position = 0; + $this->initialized = false; + + // Skip initial elements + while ($this->position < $this->skip && parent::valid()) { + parent::next(); + $this->position++; + } + $this->initialized = true; + } + + public function accept(): bool + { + // After initialization, accept all elements + return $this->initialized; + } +} \ No newline at end of file diff --git a/src/Streams/Iterators/TakeIterator.php b/src/Streams/Iterators/TakeIterator.php new file mode 100644 index 0000000..c9c2a5e --- /dev/null +++ b/src/Streams/Iterators/TakeIterator.php @@ -0,0 +1,46 @@ +iterator = $iterator; + $this->limit = $limit; + } + + public function rewind(): void + { + $this->iterator->rewind(); + $this->position = 0; + } + + public function current(): mixed + { + return $this->iterator->current(); + } + + public function key(): mixed + { + return $this->iterator->key(); + } + + public function next(): void + { + $this->iterator->next(); + $this->position++; + } + + public function valid(): bool + { + return $this->position < $this->limit && $this->iterator->valid(); + } +} \ No newline at end of file diff --git a/src/Streams/SpaceTimeStream.php b/src/Streams/SpaceTimeStream.php new file mode 100644 index 0000000..9f2295f --- /dev/null +++ b/src/Streams/SpaceTimeStream.php @@ -0,0 +1,523 @@ +source = $source; + } + + /** + * Create stream from array or iterable + */ + public static function from(iterable $source): self + { + return new self($source); + } + + /** + * Create stream from file + */ + public static function fromFile(string $filename, string $mode = 'r'): self + { + $generator = function() use ($filename, $mode) { + $handle = fopen($filename, $mode); + if (!$handle) { + throw new \RuntimeException("Cannot open file: $filename"); + } + + try { + while (($line = fgets($handle)) !== false) { + yield rtrim($line, "\r\n"); + } + } finally { + fclose($handle); + } + }; + + return new self($generator()); + } + + /** + * Create stream from CSV file + */ + public static function fromCsv(string $filename, array $options = []): self + { + $options = array_merge([ + 'delimiter' => ',', + 'enclosure' => '"', + 'escape' => '\\', + 'headers' => true, + ], $options); + + $generator = function() use ($filename, $options) { + $handle = fopen($filename, 'r'); + if (!$handle) { + throw new \RuntimeException("Cannot open CSV file: $filename"); + } + + try { + $headers = null; + if ($options['headers']) { + $headers = fgetcsv($handle, 0, $options['delimiter'], $options['enclosure'], $options['escape']); + } + + while (($row = fgetcsv($handle, 0, $options['delimiter'], $options['enclosure'], $options['escape'])) !== false) { + if ($headers) { + yield array_combine($headers, $row); + } else { + yield $row; + } + } + } finally { + fclose($handle); + } + }; + + return new self($generator()); + } + + /** + * Map operation + */ + public function map(callable $callback): self + { + $this->operations[] = ['type' => 'map', 'callback' => $callback]; + return $this; + } + + /** + * Filter operation + */ + public function filter(callable $callback): self + { + $this->operations[] = ['type' => 'filter', 'callback' => $callback]; + return $this; + } + + /** + * Flat map operation + */ + public function flatMap(callable $callback): self + { + $this->operations[] = ['type' => 'flatMap', 'callback' => $callback]; + return $this; + } + + /** + * Take first n elements + */ + public function take(int $n): self + { + $this->operations[] = ['type' => 'take', 'count' => $n]; + return $this; + } + + /** + * Skip first n elements + */ + public function skip(int $n): self + { + $this->operations[] = ['type' => 'skip', 'count' => $n]; + return $this; + } + + /** + * Chunk stream into √n sized chunks + */ + public function chunkBySqrtN(): self + { + $this->operations[] = ['type' => 'chunkBySqrtN']; + return $this; + } + + /** + * Chunk stream into fixed size chunks + */ + public function chunk(int $size): self + { + $this->operations[] = ['type' => 'chunk', 'size' => $size]; + return $this; + } + + /** + * Apply operations and iterate + */ + public function each(callable $callback): void + { + foreach ($this->toIterator() as $item) { + $callback($item); + } + } + + /** + * Collect to array (caution with large streams!) + */ + public function toArray(): array + { + return iterator_to_array($this->toIterator()); + } + + /** + * Reduce stream to single value + */ + public function reduce(callable $callback, mixed $initial = null): mixed + { + $accumulator = $initial; + + foreach ($this->toIterator() as $item) { + $accumulator = $callback($accumulator, $item); + } + + return $accumulator; + } + + /** + * Count elements + */ + public function count(): int + { + $count = 0; + foreach ($this->toIterator() as $item) { + $count++; + } + return $count; + } + + /** + * Write to file + */ + public function toFile(string $filename, string $mode = 'w'): void + { + $handle = fopen($filename, $mode); + if (!$handle) { + throw new \RuntimeException("Cannot open file for writing: $filename"); + } + + try { + foreach ($this->toIterator() as $item) { + fwrite($handle, (string)$item . "\n"); + } + } finally { + fclose($handle); + } + } + + /** + * Write to CSV file + */ + public function toCsv(string $filename, array $options = []): void + { + $options = array_merge([ + 'delimiter' => ',', + 'enclosure' => '"', + 'escape' => '\\', + 'headers' => true, + ], $options); + + $handle = fopen($filename, 'w'); + if (!$handle) { + throw new \RuntimeException("Cannot open CSV file for writing: $filename"); + } + + try { + $firstRow = true; + + foreach ($this->toIterator() as $row) { + if ($firstRow && $options['headers'] && is_array($row)) { + fputcsv($handle, array_keys($row), $options['delimiter'], $options['enclosure'], $options['escape']); + $firstRow = false; + } + + if (is_array($row)) { + fputcsv($handle, $row, $options['delimiter'], $options['enclosure'], $options['escape']); + } + } + } finally { + fclose($handle); + } + } + + /** + * Get iterator with all operations applied + */ + private function toIterator(): \Iterator + { + $iterator = $this->source instanceof \Iterator ? $this->source : new \ArrayIterator($this->source); + + foreach ($this->operations as $operation) { + $iterator = match($operation['type']) { + 'map' => new MapIterator($iterator, $operation['callback']), + 'filter' => new FilterIterator($iterator, $operation['callback']), + 'flatMap' => new FlatMapIterator($iterator, $operation['callback']), + 'take' => new TakeIterator($iterator, $operation['count']), + 'skip' => new SkipIterator($iterator, $operation['count']), + 'chunk' => new ChunkIterator($iterator, $operation['size']), + 'chunkBySqrtN' => new ChunkIterator($iterator, $this->estimateSqrtN()), + default => $iterator, + }; + } + + return $iterator; + } + + /** + * Estimate √n for chunking + */ + private function estimateSqrtN(): int + { + // If source is countable, use exact count + if (is_array($this->source) || $this->source instanceof \Countable) { + return SpaceTimeConfig::calculateSqrtN(count($this->source)); + } + + // Otherwise use a reasonable default + return 1000; + } +} + +/** + * Map iterator + */ +class MapIterator extends \IteratorIterator +{ + private $callback; + + public function __construct(\Iterator $iterator, callable $callback) + { + parent::__construct($iterator); + $this->callback = $callback; + } + + public function current(): mixed + { + return ($this->callback)(parent::current()); + } +} + +/** + * Filter iterator + */ +class FilterIterator extends \FilterIterator +{ + private $callback; + + public function __construct(\Iterator $iterator, callable $callback) + { + parent::__construct($iterator); + $this->callback = $callback; + } + + public function accept(): bool + { + return ($this->callback)($this->current()); + } +} + +/** + * Flat map iterator + */ +class FlatMapIterator implements \Iterator +{ + private \Iterator $iterator; + private $callback; + private ?\Iterator $currentIterator = null; + private int $index = 0; + + public function __construct(\Iterator $iterator, callable $callback) + { + $this->iterator = $iterator; + $this->callback = $callback; + } + + public function rewind(): void + { + $this->iterator->rewind(); + $this->index = 0; + $this->loadCurrentIterator(); + } + + public function current(): mixed + { + return $this->currentIterator?->current(); + } + + public function key(): mixed + { + return $this->index; + } + + public function next(): void + { + $this->index++; + $this->currentIterator?->next(); + + if (!$this->currentIterator || !$this->currentIterator->valid()) { + $this->iterator->next(); + $this->loadCurrentIterator(); + } + } + + public function valid(): bool + { + return $this->currentIterator && $this->currentIterator->valid(); + } + + private function loadCurrentIterator(): void + { + $this->currentIterator = null; + + while ($this->iterator->valid()) { + $result = ($this->callback)($this->iterator->current()); + + if (is_array($result)) { + $this->currentIterator = new \ArrayIterator($result); + } elseif ($result instanceof \Iterator) { + $this->currentIterator = $result; + } elseif ($result instanceof \IteratorAggregate) { + $this->currentIterator = $result->getIterator(); + } else { + $this->currentIterator = new \ArrayIterator([$result]); + } + + $this->currentIterator->rewind(); + + if ($this->currentIterator->valid()) { + return; + } + + // Current result is empty, move to next + $this->iterator->next(); + } + } +} + +/** + * Take iterator + */ +class TakeIterator extends \IteratorIterator +{ + private int $count; + private int $taken = 0; + + public function __construct(\Iterator $iterator, int $count) + { + parent::__construct($iterator); + $this->count = $count; + } + + public function rewind(): void + { + parent::rewind(); + $this->taken = 0; + } + + public function next(): void + { + parent::next(); + $this->taken++; + } + + public function valid(): bool + { + return $this->taken < $this->count && parent::valid(); + } +} + +/** + * Skip iterator + */ +class SkipIterator extends \IteratorIterator +{ + private int $count; + private bool $skipped = false; + + public function __construct(\Iterator $iterator, int $count) + { + parent::__construct($iterator); + $this->count = $count; + } + + public function rewind(): void + { + parent::rewind(); + $this->skip(); + } + + private function skip(): void + { + if (!$this->skipped) { + for ($i = 0; $i < $this->count && parent::valid(); $i++) { + parent::next(); + } + $this->skipped = true; + } + } +} + +/** + * Chunk iterator + */ +class ChunkIterator implements \Iterator +{ + private \Iterator $iterator; + private int $chunkSize; + private array $currentChunk = []; + private int $position = 0; + + public function __construct(\Iterator $iterator, int $chunkSize) + { + $this->iterator = $iterator; + $this->chunkSize = max(1, $chunkSize); + } + + public function rewind(): void + { + $this->iterator->rewind(); + $this->position = 0; + $this->loadChunk(); + } + + public function current(): array + { + return $this->currentChunk; + } + + public function key(): int + { + return $this->position; + } + + public function next(): void + { + $this->position++; + $this->loadChunk(); + } + + public function valid(): bool + { + return !empty($this->currentChunk); + } + + private function loadChunk(): void + { + $this->currentChunk = []; + + for ($i = 0; $i < $this->chunkSize && $this->iterator->valid(); $i++) { + $this->currentChunk[] = $this->iterator->current(); + $this->iterator->next(); + } + } +} \ No newline at end of file diff --git a/src/Symfony/Command/ProcessFileCommand.php b/src/Symfony/Command/ProcessFileCommand.php new file mode 100644 index 0000000..9682a84 --- /dev/null +++ b/src/Symfony/Command/ProcessFileCommand.php @@ -0,0 +1,212 @@ +addArgument('input', InputArgument::REQUIRED, 'Input file path') + ->addArgument('output', InputArgument::REQUIRED, 'Output file path') + ->addOption('format', 'f', InputOption::VALUE_REQUIRED, 'File format (csv, json, text)', 'text') + ->addOption('checkpoint', 'c', InputOption::VALUE_NONE, 'Enable checkpointing') + ->addOption('filter', null, InputOption::VALUE_REQUIRED, 'Filter expression') + ->addOption('transform', null, InputOption::VALUE_REQUIRED, 'Transform expression'); + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $io = new SymfonyStyle($input, $output); + + $inputFile = $input->getArgument('input'); + $outputFile = $input->getArgument('output'); + $format = $input->getOption('format'); + $useCheckpoint = $input->getOption('checkpoint'); + + if (!file_exists($inputFile)) { + $io->error("Input file not found: $inputFile"); + return Command::FAILURE; + } + + $io->title('SpaceTime File Processor'); + $io->text([ + "Input: $inputFile", + "Output: $outputFile", + "Format: $format", + "Checkpointing: " . ($useCheckpoint ? 'Enabled' : 'Disabled'), + ]); + + try { + // Create stream based on format + $stream = match($format) { + 'csv' => SpaceTimeStream::fromCsv($inputFile), + 'json' => $this->createJsonStream($inputFile), + default => SpaceTimeStream::fromFile($inputFile), + }; + + // Apply filters if specified + if ($filter = $input->getOption('filter')) { + $stream = $stream->filter($this->createFilterFunction($filter)); + } + + // Apply transformations if specified + if ($transform = $input->getOption('transform')) { + $stream = $stream->map($this->createTransformFunction($transform)); + } + + // Process with checkpoint support + if ($useCheckpoint) { + $this->processWithCheckpoint($stream, $outputFile, $format, $io); + } else { + $this->processStream($stream, $outputFile, $format, $io); + } + + $io->success('File processed successfully!'); + return Command::SUCCESS; + + } catch (\Exception $e) { + $io->error('Processing failed: ' . $e->getMessage()); + return Command::FAILURE; + } + } + + private function processStream(SpaceTimeStream $stream, string $outputFile, string $format, SymfonyStyle $io): void + { + $count = 0; + $progressBar = $io->createProgressBar(); + + // Process based on format + match($format) { + 'csv' => $stream->toCsv($outputFile), + 'json' => $this->saveAsJson($stream, $outputFile), + default => $stream->toFile($outputFile), + }; + + $progressBar->finish(); + $io->newLine(); + } + + private function processWithCheckpoint(SpaceTimeStream $stream, string $outputFile, string $format, SymfonyStyle $io): void + { + $checkpoint = new CheckpointManager('process_file_' . md5($outputFile)); + + $checkpoint->wrap(function($state) use ($stream, $outputFile, $format, $io) { + $processed = $state['processed'] ?? 0; + $handle = fopen($outputFile, $processed > 0 ? 'a' : 'w'); + + $stream->skip($processed)->each(function($item) use ($handle, &$processed, $checkpoint) { + fwrite($handle, json_encode($item) . "\n"); + $processed++; + + if ($checkpoint->shouldCheckpoint()) { + $checkpoint->save(['processed' => $processed]); + } + }); + + fclose($handle); + + return $processed; + }); + } + + private function createJsonStream(string $file): SpaceTimeStream + { + return SpaceTimeStream::from(function() use ($file) { + $content = file_get_contents($file); + $data = json_decode($content, true); + + if (is_array($data)) { + foreach ($data as $item) { + yield $item; + } + } + }); + } + + private function saveAsJson(SpaceTimeStream $stream, string $outputFile): void + { + $handle = fopen($outputFile, 'w'); + fwrite($handle, "[\n"); + + $first = true; + $stream->each(function($item) use ($handle, &$first) { + if (!$first) { + fwrite($handle, ",\n"); + } + fwrite($handle, json_encode($item)); + $first = false; + }); + + fwrite($handle, "\n]"); + fclose($handle); + } + + private function createFilterFunction(string $expression): callable + { + // Simple expression parser (in production, use a proper expression evaluator) + return function($item) use ($expression) { + // Example: "price > 100" + if (preg_match('/(\w+)\s*([><=]+)\s*(.+)/', $expression, $matches)) { + $field = $matches[1]; + $operator = $matches[2]; + $value = $matches[3]; + + if (!isset($item[$field])) { + return false; + } + + return match($operator) { + '>' => $item[$field] > $value, + '<' => $item[$field] < $value, + '>=' => $item[$field] >= $value, + '<=' => $item[$field] <= $value, + '=' => $item[$field] == $value, + default => true, + }; + } + + return true; + }; + } + + private function createTransformFunction(string $expression): callable + { + // Simple transformation (in production, use a proper expression evaluator) + return function($item) use ($expression) { + // Example: "upper(name)" + if (preg_match('/(\w+)\((\w+)\)/', $expression, $matches)) { + $function = $matches[1]; + $field = $matches[2]; + + if (isset($item[$field])) { + $item[$field] = match($function) { + 'upper' => strtoupper($item[$field]), + 'lower' => strtolower($item[$field]), + 'trim' => trim($item[$field]), + default => $item[$field], + }; + } + } + + return $item; + }; + } +} \ No newline at end of file diff --git a/src/Symfony/SpaceTimeBundle.php b/src/Symfony/SpaceTimeBundle.php new file mode 100644 index 0000000..5c1b5d0 --- /dev/null +++ b/src/Symfony/SpaceTimeBundle.php @@ -0,0 +1,68 @@ +import('../config/services.yaml'); + + // Configure SpaceTime + $container->parameters() + ->set('spacetime.memory_limit', $config['memory_limit'] ?? '256M') + ->set('spacetime.storage_path', $config['storage_path'] ?? '%kernel.project_dir%/var/spacetime') + ->set('spacetime.chunk_strategy', $config['chunk_strategy'] ?? 'sqrt_n') + ->set('spacetime.enable_checkpointing', $config['enable_checkpointing'] ?? true) + ->set('spacetime.compression', $config['compression'] ?? true); + } + + public function configure(DefinitionConfigurator $definition): void + { + $definition->rootNode() + ->children() + ->scalarNode('memory_limit') + ->defaultValue('256M') + ->info('Maximum memory that SpaceTime operations can use') + ->end() + ->scalarNode('storage_path') + ->defaultValue('%kernel.project_dir%/var/spacetime') + ->info('Directory for temporary files') + ->end() + ->enumNode('chunk_strategy') + ->values(['sqrt_n', 'memory_based', 'fixed']) + ->defaultValue('sqrt_n') + ->info('Strategy for determining chunk sizes') + ->end() + ->booleanNode('enable_checkpointing') + ->defaultTrue() + ->info('Enable automatic checkpointing') + ->end() + ->booleanNode('compression') + ->defaultTrue() + ->info('Compress data in external storage') + ->end() + ->integerNode('compression_level') + ->defaultValue(6) + ->min(1) + ->max(9) + ->info('Compression level (1-9)') + ->end() + ->end(); + } +} \ No newline at end of file diff --git a/tests/Algorithms/ExternalGroupByTest.php b/tests/Algorithms/ExternalGroupByTest.php new file mode 100644 index 0000000..cf841e2 --- /dev/null +++ b/tests/Algorithms/ExternalGroupByTest.php @@ -0,0 +1,150 @@ + sys_get_temp_dir() . '/spacetime_test', + ]); + } + + protected function tearDown(): void + { + $path = sys_get_temp_dir() . '/spacetime_test'; + if (is_dir($path)) { + array_map('unlink', glob("$path/*")); + rmdir($path); + } + + parent::tearDown(); + } + + public function testBasicGroupBy(): void + { + $data = [ + ['category' => 'A', 'value' => 1], + ['category' => 'B', 'value' => 2], + ['category' => 'A', 'value' => 3], + ['category' => 'B', 'value' => 4], + ['category' => 'C', 'value' => 5], + ]; + + $grouped = ExternalGroupBy::groupBy($data, fn($item) => $item['category']); + + $this->assertCount(3, $grouped); + $this->assertCount(2, $grouped['A']); + $this->assertCount(2, $grouped['B']); + $this->assertCount(1, $grouped['C']); + + $this->assertEquals(1, $grouped['A'][0]['value']); + $this->assertEquals(3, $grouped['A'][1]['value']); + } + + public function testGroupByCount(): void + { + $data = [ + ['type' => 'foo'], + ['type' => 'bar'], + ['type' => 'foo'], + ['type' => 'baz'], + ['type' => 'foo'], + ]; + + $counts = ExternalGroupBy::groupByCount($data, fn($item) => $item['type']); + + $this->assertEquals(3, $counts['foo']); + $this->assertEquals(1, $counts['bar']); + $this->assertEquals(1, $counts['baz']); + } + + public function testGroupBySum(): void + { + $data = [ + ['group' => 'A', 'amount' => 10], + ['group' => 'B', 'amount' => 20], + ['group' => 'A', 'amount' => 15], + ['group' => 'B', 'amount' => 25], + ]; + + $sums = ExternalGroupBy::groupBySum( + $data, + fn($item) => $item['group'], + fn($item) => $item['amount'] + ); + + $this->assertEquals(25, $sums['A']); + $this->assertEquals(45, $sums['B']); + } + + public function testGroupByAggregate(): void + { + $data = [ + ['user' => 'john', 'score' => 80], + ['user' => 'jane', 'score' => 90], + ['user' => 'john', 'score' => 85], + ['user' => 'jane', 'score' => 95], + ]; + + $maxScores = ExternalGroupBy::groupByAggregate( + $data, + fn($item) => $item['user'], + fn($max, $item) => max($max ?? 0, $item['score']), + 0 + ); + + $this->assertEquals(85, $maxScores['john']); + $this->assertEquals(95, $maxScores['jane']); + } + + public function testGroupByStreaming(): void + { + $data = []; + for ($i = 0; $i < 100; $i++) { + $data[] = [ + 'group' => chr(65 + ($i % 5)), // A-E + 'value' => $i, + ]; + } + + $groups = []; + foreach (ExternalGroupBy::groupByStreaming($data, fn($item) => $item['group']) as $key => $items) { + $groups[$key] = count($items); + } + + $this->assertCount(5, $groups); + $this->assertEquals(20, $groups['A']); + $this->assertEquals(20, $groups['B']); + } + + public function testGroupByWithLimit(): void + { + $data = []; + for ($i = 0; $i < 50; $i++) { + $data[] = ['key' => "group_$i", 'value' => $i]; + } + + $grouped = ExternalGroupBy::groupByWithLimit( + $data, + fn($item) => $item['key'], + 5 // Small limit to force external storage + ); + + $this->assertCount(50, $grouped); + + foreach ($grouped as $key => $items) { + $this->assertCount(1, $items); + $this->assertEquals($key, $items[0]['key']); + } + } +} \ No newline at end of file diff --git a/tests/Algorithms/ExternalSortTest.php b/tests/Algorithms/ExternalSortTest.php new file mode 100644 index 0000000..3e8b761 --- /dev/null +++ b/tests/Algorithms/ExternalSortTest.php @@ -0,0 +1,111 @@ + sys_get_temp_dir() . '/spacetime_test', + ]); + } + + protected function tearDown(): void + { + $path = sys_get_temp_dir() . '/spacetime_test'; + if (is_dir($path)) { + array_map('unlink', glob("$path/*")); + rmdir($path); + } + + parent::tearDown(); + } + + public function testBasicSort(): void + { + $data = [5, 2, 8, 1, 9, 3, 7, 4, 6]; + $sorted = ExternalSort::sort($data); + + $this->assertEquals([1, 2, 3, 4, 5, 6, 7, 8, 9], $sorted); + } + + public function testSortWithCustomComparator(): void + { + $data = [5, 2, 8, 1, 9, 3, 7, 4, 6]; + $sorted = ExternalSort::sort($data, fn($a, $b) => $b <=> $a); + + $this->assertEquals([9, 8, 7, 6, 5, 4, 3, 2, 1], $sorted); + } + + public function testSortBy(): void + { + $data = [ + ['name' => 'John', 'age' => 25], + ['name' => 'Jane', 'age' => 30], + ['name' => 'Bob', 'age' => 20], + ]; + + $sorted = ExternalSort::sortBy($data, fn($item) => $item['age']); + + $this->assertEquals('Bob', $sorted[0]['name']); + $this->assertEquals('John', $sorted[1]['name']); + $this->assertEquals('Jane', $sorted[2]['name']); + } + + public function testLargeDataSet(): void + { + // Generate large dataset + $data = []; + for ($i = 0; $i < 20000; $i++) { + $data[] = mt_rand(1, 100000); + } + + $sorted = ExternalSort::sort($data); + + // Verify it's sorted + for ($i = 1; $i < count($sorted); $i++) { + $this->assertGreaterThanOrEqual($sorted[$i - 1], $sorted[$i]); + } + + // Verify same elements + $this->assertEquals(count($data), count($sorted)); + sort($data); + $this->assertEquals($data, $sorted); + } + + public function testSortObjects(): void + { + $objects = [ + (object)['id' => 3, 'value' => 'c'], + (object)['id' => 1, 'value' => 'a'], + (object)['id' => 2, 'value' => 'b'], + ]; + + $sorted = ExternalSort::sortBy($objects, fn($obj) => $obj->id); + + $this->assertEquals(1, $sorted[0]->id); + $this->assertEquals(2, $sorted[1]->id); + $this->assertEquals(3, $sorted[2]->id); + } + + public function testStreamingSort(): void + { + $data = range(10, 1); + $result = []; + + foreach (ExternalSort::sortStreaming($data) as $item) { + $result[] = $item; + } + + $this->assertEquals(range(1, 10), $result); + } +} \ No newline at end of file diff --git a/tests/Batch/BatchProcessorTest.php b/tests/Batch/BatchProcessorTest.php new file mode 100644 index 0000000..7cf1c36 --- /dev/null +++ b/tests/Batch/BatchProcessorTest.php @@ -0,0 +1,132 @@ + 3, + 'checkpoint_enabled' => false, + ]); + + $items = range(1, 10); + + $result = $processor->process($items, function($batch) { + $processed = []; + foreach ($batch as $key => $item) { + $processed[$key] = $item * 2; + } + return $processed; + }); + + $this->assertEquals(10, $result->getProcessedCount()); + $this->assertEquals(10, $result->getSuccessCount()); + $this->assertEquals(0, $result->getErrorCount()); + + // Check results + $results = $result->getResults(); + $this->assertEquals(2, $results[0]); + $this->assertEquals(20, $results[9]); + } + + public function testBatchProcessingWithErrors(): void + { + $processor = new BatchProcessor([ + 'batch_size' => 2, + 'checkpoint_enabled' => false, + 'max_retries' => 1, + ]); + + $items = range(1, 5); + + $result = $processor->process($items, function($batch) { + $processed = []; + foreach ($batch as $key => $item) { + if ($item === 3) { + throw new \Exception('Error processing item 3'); + } + $processed[$key] = $item * 2; + } + return $processed; + }); + + $this->assertEquals(5, $result->getProcessedCount()); + $this->assertEquals(3, $result->getSuccessCount()); + $this->assertEquals(2, $result->getErrorCount()); + + $errors = $result->getErrors(); + $this->assertArrayHasKey(2, $errors); // Item 3 is at index 2 + } + + public function testProgressCallback(): void + { + $progressCalls = []; + + $processor = new BatchProcessor([ + 'batch_size' => 2, + 'checkpoint_enabled' => false, + 'progress_callback' => function($batchNumber, $batchSize, $result) use (&$progressCalls) { + $progressCalls[] = [ + 'batch' => $batchNumber, + 'size' => $batchSize, + 'processed' => $result->getProcessedCount(), + ]; + }, + ]); + + $items = range(1, 5); + $processor->process($items, fn($batch) => $batch); + + $this->assertCount(3, $progressCalls); // 5 items in batches of 2 + $this->assertEquals(0, $progressCalls[0]['batch']); + $this->assertEquals(2, $progressCalls[0]['size']); + } + + public function testBatchResult(): void + { + $result = new BatchResult(); + + $result->addSuccess('key1', 'value1'); + $result->addSuccess('key2', 'value2'); + $result->addError('key3', new \Exception('Error')); + + $this->assertEquals(3, $result->getProcessedCount()); + $this->assertEquals(2, $result->getSuccessCount()); + $this->assertEquals(1, $result->getErrorCount()); + + $this->assertTrue($result->isProcessed('key1')); + $this->assertFalse($result->isComplete()); + + $this->assertEquals('value1', $result->getResult('key1')); + $this->assertNotNull($result->getError('key3')); + + $summary = $result->getSummary(); + $this->assertEquals(3, $summary['total_processed']); + $this->assertGreaterThan(0, $summary['execution_time']); + } + + public function testCheckpointingState(): void + { + $result = new BatchResult(); + + $result->addSuccess('a', 1); + $result->addSuccess('b', 2); + + $state = $result->getState(); + + $newResult = new BatchResult(); + $newResult->restore($state); + + $this->assertEquals(2, $newResult->getProcessedCount()); + $this->assertEquals(1, $newResult->getResult('a')); + $this->assertEquals(2, $newResult->getResult('b')); + } +} \ No newline at end of file diff --git a/tests/Checkpoint/CheckpointManagerTest.php b/tests/Checkpoint/CheckpointManagerTest.php new file mode 100644 index 0000000..825a96e --- /dev/null +++ b/tests/Checkpoint/CheckpointManagerTest.php @@ -0,0 +1,220 @@ +mockStorage = $this->createMock(CheckpointStorage::class); + $this->manager = new CheckpointManager('test-checkpoint', $this->mockStorage); + } + + public function testShouldCheckpointReturnsFalseWhenCheckpointingDisabled(): void + { + // Mock the static config method + $this->assertFalse($this->manager->shouldCheckpoint()); + } + + public function testShouldCheckpointReturnsTrueAfterInterval(): void + { + // This test would need to mock SpaceTimeConfig::isCheckpointingEnabled() + // and handle time-based logic + $this->markTestSkipped('Requires static method mocking for SpaceTimeConfig'); + } + + public function testSaveStoresCheckpointData(): void + { + $testData = ['progress' => 50, 'items_processed' => 100]; + + $this->mockStorage + ->expects($this->once()) + ->method('save') + ->with( + $this->equalTo('test-checkpoint'), + $this->callback(function ($checkpoint) use ($testData) { + return $checkpoint['id'] === 'test-checkpoint' && + isset($checkpoint['timestamp']) && + $checkpoint['data'] === $testData; + }) + ); + + $this->manager->save($testData); + } + + public function testLoadReturnsCheckpointData(): void + { + $testData = ['progress' => 75, 'items_processed' => 150]; + $checkpoint = [ + 'id' => 'test-checkpoint', + 'timestamp' => time(), + 'data' => $testData, + ]; + + $this->mockStorage + ->expects($this->once()) + ->method('load') + ->with('test-checkpoint') + ->willReturn($checkpoint); + + $loadedData = $this->manager->load(); + $this->assertEquals($testData, $loadedData); + } + + public function testLoadReturnsNullWhenNoCheckpoint(): void + { + $this->mockStorage + ->expects($this->once()) + ->method('load') + ->with('test-checkpoint') + ->willReturn(null); + + $this->assertNull($this->manager->load()); + } + + public function testLoadReturnsNullWhenCheckpointHasNoData(): void + { + $checkpoint = [ + 'id' => 'test-checkpoint', + 'timestamp' => time(), + // No 'data' key + ]; + + $this->mockStorage + ->expects($this->once()) + ->method('load') + ->with('test-checkpoint') + ->willReturn($checkpoint); + + $this->assertNull($this->manager->load()); + } + + public function testExistsReturnsStorageResult(): void + { + $this->mockStorage + ->expects($this->once()) + ->method('exists') + ->with('test-checkpoint') + ->willReturn(true); + + $this->assertTrue($this->manager->exists()); + + $this->mockStorage + ->expects($this->once()) + ->method('exists') + ->with('test-checkpoint') + ->willReturn(false); + + $this->assertFalse($this->manager->exists()); + } + + public function testDeleteRemovesCheckpoint(): void + { + $this->mockStorage + ->expects($this->once()) + ->method('delete') + ->with('test-checkpoint'); + + $this->manager->delete(); + } + + public function testSetIntervalUpdatesCheckpointInterval(): void + { + // Test minimum interval of 1 second + $this->manager->setInterval(0); + // We can't directly test the interval value since it's private, + // but we can ensure the method doesn't throw an exception + $this->assertTrue(true); + + $this->manager->setInterval(120); + $this->assertTrue(true); + } + + public function testWrapExecutesOperationWithInitialState(): void + { + $initialState = ['counter' => 0]; + $expectedResult = 'operation completed'; + + $this->mockStorage + ->expects($this->once()) + ->method('load') + ->willReturn(null); // No existing checkpoint + + $this->mockStorage + ->expects($this->once()) + ->method('delete') + ->with('test-checkpoint'); + + $operation = function ($state, $manager) use ($expectedResult) { + $this->assertEquals(['counter' => 0], $state); + $this->assertInstanceOf(CheckpointManager::class, $manager); + return $expectedResult; + }; + + $result = $this->manager->wrap($operation, $initialState); + $this->assertEquals($expectedResult, $result); + } + + public function testWrapResumesFromCheckpoint(): void + { + $checkpointData = ['counter' => 50]; + $checkpoint = [ + 'id' => 'test-checkpoint', + 'timestamp' => time(), + 'data' => $checkpointData, + ]; + + $this->mockStorage + ->expects($this->once()) + ->method('load') + ->willReturn($checkpoint); + + $this->mockStorage + ->expects($this->once()) + ->method('delete') + ->with('test-checkpoint'); + + $operation = function ($state, $manager) { + $this->assertEquals(['counter' => 50], $state); + return 'resumed and completed'; + }; + + $result = $this->manager->wrap($operation, ['counter' => 0]); + $this->assertEquals('resumed and completed', $result); + } + + public function testWrapPreservesCheckpointOnException(): void + { + $this->mockStorage + ->expects($this->once()) + ->method('load') + ->willReturn(null); + + // Delete should NOT be called when exception is thrown + $this->mockStorage + ->expects($this->never()) + ->method('delete'); + + $operation = function ($state, $manager) { + throw new \RuntimeException('Operation failed'); + }; + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Operation failed'); + + $this->manager->wrap($operation); + } +} \ No newline at end of file diff --git a/tests/Collections/SpaceTimeArrayTest.php b/tests/Collections/SpaceTimeArrayTest.php new file mode 100644 index 0000000..e8036aa --- /dev/null +++ b/tests/Collections/SpaceTimeArrayTest.php @@ -0,0 +1,135 @@ + '10M', + 'external_storage_path' => sys_get_temp_dir() . '/spacetime_test', + ]); + } + + protected function tearDown(): void + { + // Clean up test files + $path = sys_get_temp_dir() . '/spacetime_test'; + if (is_dir($path)) { + array_map('unlink', glob("$path/*")); + rmdir($path); + } + + parent::tearDown(); + } + + public function testBasicArrayOperations(): void + { + $array = new SpaceTimeArray(100); + + // Test set and get + $array['key1'] = 'value1'; + $this->assertEquals('value1', $array['key1']); + + // Test isset + $this->assertTrue(isset($array['key1'])); + $this->assertFalse(isset($array['key2'])); + + // Test unset + unset($array['key1']); + $this->assertFalse(isset($array['key1'])); + + // Test count + $array['a'] = 1; + $array['b'] = 2; + $this->assertEquals(2, count($array)); + } + + public function testSpilloverToExternalStorage(): void + { + $array = new SpaceTimeArray(2); // Small threshold + + // Add items that will stay in memory + $array['hot1'] = 'value1'; + $array['hot2'] = 'value2'; + + // This should trigger spillover + $array['cold1'] = 'value3'; + $array['cold2'] = 'value4'; + + // All items should still be accessible + $this->assertEquals('value1', $array['hot1']); + $this->assertEquals('value3', $array['cold1']); + + // Count should include all items + $this->assertEquals(4, count($array)); + } + + public function testIterator(): void + { + $array = new SpaceTimeArray(2); + + $data = ['a' => 1, 'b' => 2, 'c' => 3, 'd' => 4]; + foreach ($data as $key => $value) { + $array[$key] = $value; + } + + // Test iteration + $result = []; + foreach ($array as $key => $value) { + $result[$key] = $value; + } + + $this->assertEquals($data, $result); + } + + public function testLargeDataSet(): void + { + $array = new SpaceTimeArray(100); + + // Add 1000 items + for ($i = 0; $i < 1000; $i++) { + $array["key_$i"] = "value_$i"; + } + + // Verify count + $this->assertEquals(1000, count($array)); + + // Verify random access + $this->assertEquals('value_500', $array['key_500']); + $this->assertEquals('value_999', $array['key_999']); + $this->assertEquals('value_0', $array['key_0']); + } + + public function testArrayMethods(): void + { + $array = new SpaceTimeArray(10); + + $array['a'] = 1; + $array['b'] = 2; + $array['c'] = 3; + + // Test toArray + $this->assertEquals(['a' => 1, 'b' => 2, 'c' => 3], $array->toArray()); + + // Test keys + $this->assertEquals(['a', 'b', 'c'], $array->keys()); + + // Test values + $this->assertEquals([1, 2, 3], $array->values()); + + // Test clear + $array->clear(); + $this->assertEquals(0, count($array)); + } +} \ No newline at end of file diff --git a/tests/Memory/MemoryPressureMonitorTest.php b/tests/Memory/MemoryPressureMonitorTest.php new file mode 100644 index 0000000..802d3f1 --- /dev/null +++ b/tests/Memory/MemoryPressureMonitorTest.php @@ -0,0 +1,93 @@ +getCurrentLevel(); + $this->assertInstanceOf(MemoryPressureLevel::class, $level); + } + + public function testMemoryInfo(): void + { + $monitor = new MemoryPressureMonitor(); + $info = $monitor->getMemoryInfo(); + + $this->assertArrayHasKey('limit', $info); + $this->assertArrayHasKey('usage', $info); + $this->assertArrayHasKey('percentage', $info); + $this->assertArrayHasKey('available', $info); + + $this->assertGreaterThan(0, $info['limit']); + $this->assertGreaterThanOrEqual(0, $info['usage']); + $this->assertGreaterThanOrEqual(0, $info['percentage']); + $this->assertLessThanOrEqual(100, $info['percentage']); + } + + public function testHandlerRegistration(): void + { + $monitor = new MemoryPressureMonitor(); + + $handlerCalled = false; + $handler = new class($handlerCalled) implements MemoryPressureHandler { + private $called; + + public function __construct(&$called) + { + $this->called = &$called; + } + + public function shouldHandle(MemoryPressureLevel $level): bool + { + return true; + } + + public function handle(MemoryPressureLevel $level, array $memoryInfo): void + { + $this->called = true; + } + }; + + $monitor->registerHandler($handler); + $monitor->check(); + + $this->assertTrue($handlerCalled); + } + + public function testMemoryLimitParsing(): void + { + // Test various memory limit formats + $testCases = [ + '256M' => 256 * 1024 * 1024, + '1G' => 1024 * 1024 * 1024, + '512K' => 512 * 1024, + '1024' => 1024, + ]; + + foreach ($testCases as $limit => $expected) { + $monitor = new MemoryPressureMonitor($limit); + $info = $monitor->getMemoryInfo(); + $this->assertEquals($expected, $info['limit']); + } + } + + public function testPressureLevelComparison(): void + { + $this->assertTrue(MemoryPressureLevel::HIGH->isHigherThan(MemoryPressureLevel::MEDIUM)); + $this->assertTrue(MemoryPressureLevel::CRITICAL->isHigherThan(MemoryPressureLevel::HIGH)); + $this->assertFalse(MemoryPressureLevel::LOW->isHigherThan(MemoryPressureLevel::MEDIUM)); + $this->assertFalse(MemoryPressureLevel::NONE->isHigherThan(MemoryPressureLevel::LOW)); + } +} \ No newline at end of file diff --git a/tests/Streams/SpaceTimeStreamTest.php b/tests/Streams/SpaceTimeStreamTest.php new file mode 100644 index 0000000..fb483cc --- /dev/null +++ b/tests/Streams/SpaceTimeStreamTest.php @@ -0,0 +1,161 @@ +testFile = sys_get_temp_dir() . '/test_stream.txt'; + file_put_contents($this->testFile, "line1\nline2\nline3\nline4\nline5"); + + $this->testCsv = sys_get_temp_dir() . '/test_stream.csv'; + file_put_contents($this->testCsv, "name,age\nJohn,25\nJane,30\nBob,20"); + } + + protected function tearDown(): void + { + if (file_exists($this->testFile)) { + unlink($this->testFile); + } + if (file_exists($this->testCsv)) { + unlink($this->testCsv); + } + + parent::tearDown(); + } + + public function testFromArray(): void + { + $data = [1, 2, 3, 4, 5]; + $result = SpaceTimeStream::from($data)->toArray(); + + $this->assertEquals($data, $result); + } + + public function testMap(): void + { + $data = [1, 2, 3, 4, 5]; + $result = SpaceTimeStream::from($data) + ->map(fn($x) => $x * 2) + ->toArray(); + + $this->assertEquals([2, 4, 6, 8, 10], $result); + } + + public function testFilter(): void + { + $data = [1, 2, 3, 4, 5]; + $result = SpaceTimeStream::from($data) + ->filter(fn($x) => $x % 2 === 0) + ->toArray(); + + $this->assertEquals([1 => 2, 3 => 4], $result); + } + + public function testChaining(): void + { + $data = range(1, 10); + $result = SpaceTimeStream::from($data) + ->filter(fn($x) => $x % 2 === 0) + ->map(fn($x) => $x * 2) + ->take(3) + ->toArray(); + + $expected = [1 => 4, 3 => 8, 5 => 12]; + $this->assertEquals($expected, $result); + } + + public function testFromFile(): void + { + $lines = SpaceTimeStream::fromFile($this->testFile)->toArray(); + + $this->assertEquals(['line1', 'line2', 'line3', 'line4', 'line5'], $lines); + } + + public function testFromCsv(): void + { + $rows = SpaceTimeStream::fromCsv($this->testCsv)->toArray(); + + $expected = [ + ['name' => 'John', 'age' => '25'], + ['name' => 'Jane', 'age' => '30'], + ['name' => 'Bob', 'age' => '20'], + ]; + + $this->assertEquals($expected, $rows); + } + + public function testReduce(): void + { + $sum = SpaceTimeStream::from([1, 2, 3, 4, 5]) + ->reduce(fn($acc, $x) => $acc + $x, 0); + + $this->assertEquals(15, $sum); + } + + public function testCount(): void + { + $count = SpaceTimeStream::from(range(1, 100)) + ->filter(fn($x) => $x % 2 === 0) + ->count(); + + $this->assertEquals(50, $count); + } + + public function testChunk(): void + { + $chunks = SpaceTimeStream::from(range(1, 10)) + ->chunk(3) + ->toArray(); + + $this->assertCount(4, $chunks); + $this->assertEquals([1, 2, 3], $chunks[0]); + $this->assertEquals([4, 5, 6], $chunks[1]); + $this->assertEquals([7, 8, 9], $chunks[2]); + $this->assertEquals([10], $chunks[3]); + } + + public function testFlatMap(): void + { + $data = [[1, 2], [3, 4], [5]]; + $result = SpaceTimeStream::from($data) + ->flatMap(fn($arr) => $arr) + ->toArray(); + + $this->assertEquals([1, 2, 3, 4, 5], array_values($result)); + } + + public function testSkip(): void + { + $result = SpaceTimeStream::from(range(1, 10)) + ->skip(5) + ->toArray(); + + $this->assertEquals([6, 7, 8, 9, 10], array_values($result)); + } + + public function testWriteToFile(): void + { + $outputFile = sys_get_temp_dir() . '/output_stream.txt'; + + SpaceTimeStream::from(['a', 'b', 'c']) + ->map(fn($x) => strtoupper($x)) + ->toFile($outputFile); + + $content = file_get_contents($outputFile); + $this->assertEquals("A\nB\nC\n", $content); + + unlink($outputFile); + } +} \ No newline at end of file