using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Threading.Tasks; using System.Runtime.CompilerServices; using System.Threading; namespace SqrtSpace.SpaceTime.Linq { /// /// LINQ extensions that implement space-time tradeoffs for memory-efficient operations /// public static class SpaceTimeLinqExtensions { /// /// Orders a sequence using external merge sort with √n memory usage /// public static IOrderedEnumerable OrderByExternal( this IEnumerable source, Func keySelector, IComparer comparer = null, int? bufferSize = null) { if (source == null) throw new ArgumentNullException(nameof(source)); if (keySelector == null) throw new ArgumentNullException(nameof(keySelector)); return new ExternalOrderedEnumerable(source, keySelector, comparer, bufferSize); } /// /// Groups elements using √n memory for large datasets /// public static IEnumerable> GroupByExternal( this IEnumerable source, Func keySelector, int? bufferSize = null) { if (source == null) throw new ArgumentNullException(nameof(source)); if (keySelector == null) throw new ArgumentNullException(nameof(keySelector)); var count = source.TryGetNonEnumeratedCount(out var c) ? c : 1000000; var optimalBuffer = bufferSize ?? (int)Math.Sqrt(count); return new ExternalGrouping(source, keySelector, optimalBuffer); } /// /// Processes sequence in √n-sized batches for memory efficiency /// public static IEnumerable> BatchBySqrtN( this IEnumerable source, int? totalCount = null) { if (source == null) throw new ArgumentNullException(nameof(source)); var count = totalCount ?? (source.TryGetNonEnumeratedCount(out var c) ? c : 1000); var batchSize = Math.Max(1, (int)Math.Sqrt(count)); return source.Chunk(batchSize).Select(chunk => chunk.ToList()); } /// /// Performs a memory-efficient join using √n buffers /// public static IEnumerable JoinExternal( this IEnumerable outer, IEnumerable inner, Func outerKeySelector, Func innerKeySelector, Func resultSelector, IEqualityComparer comparer = null) { if (outer == null) throw new ArgumentNullException(nameof(outer)); if (inner == null) throw new ArgumentNullException(nameof(inner)); var innerCount = inner.TryGetNonEnumeratedCount(out var c) ? c : 10000; var bufferSize = (int)Math.Sqrt(innerCount); return ExternalJoinIterator(outer, inner, outerKeySelector, innerKeySelector, resultSelector, comparer, bufferSize); } /// /// Converts sequence to a list with checkpointing for fault tolerance /// public static List ToCheckpointedList( this IEnumerable source, string checkpointPath = null, int? checkpointInterval = null) { if (source == null) throw new ArgumentNullException(nameof(source)); var result = new List(); var count = 0; var interval = checkpointInterval ?? (int)Math.Sqrt(source.Count()); checkpointPath ??= Path.GetTempFileName(); try { // Try to restore from checkpoint if (File.Exists(checkpointPath)) { result = RestoreCheckpoint(checkpointPath); count = result.Count; } foreach (var item in source.Skip(count)) { result.Add(item); count++; if (count % interval == 0) { SaveCheckpoint(result, checkpointPath); } } return result; } finally { // Clean up checkpoint file if (File.Exists(checkpointPath)) { File.Delete(checkpointPath); } } } /// /// Performs distinct operation with limited memory using external storage /// public static IEnumerable DistinctExternal( this IEnumerable source, IEqualityComparer comparer = null, int? maxMemoryItems = null) { if (source == null) throw new ArgumentNullException(nameof(source)); var maxItems = maxMemoryItems ?? (int)Math.Sqrt(source.Count()); return new ExternalDistinct(source, comparer, maxItems); } /// /// Aggregates large sequences with √n memory checkpoints /// public static TAccumulate AggregateWithCheckpoints( this IEnumerable source, TAccumulate seed, Func func, int? checkpointInterval = null) { if (source == null) throw new ArgumentNullException(nameof(source)); if (func == null) throw new ArgumentNullException(nameof(func)); var accumulator = seed; var count = 0; var interval = checkpointInterval ?? (int)Math.Sqrt(source.Count()); var checkpoints = new Stack<(int index, TAccumulate value)>(); foreach (var item in source) { accumulator = func(accumulator, item); count++; if (count % interval == 0) { // Deep copy if TAccumulate is a reference type var checkpoint = accumulator is ICloneable cloneable ? (TAccumulate)cloneable.Clone() : accumulator; checkpoints.Push((count, checkpoint)); } } return accumulator; } /// /// Memory-efficient set operations using external storage /// public static IEnumerable UnionExternal( this IEnumerable first, IEnumerable second, IEqualityComparer comparer = null) { if (first == null) throw new ArgumentNullException(nameof(first)); if (second == null) throw new ArgumentNullException(nameof(second)); var totalCount = first.Count() + second.Count(); var bufferSize = (int)Math.Sqrt(totalCount); return ExternalSetOperation(first, second, SetOperation.Union, comparer, bufferSize); } /// /// Async enumerable with √n buffering for optimal memory usage /// public static async IAsyncEnumerable> BufferAsync( this IAsyncEnumerable source, int? bufferSize = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { if (source == null) throw new ArgumentNullException(nameof(source)); var buffer = new List(bufferSize ?? 1000); var optimalSize = bufferSize ?? (int)Math.Sqrt(1000000); // Assume large dataset await foreach (var item in source.WithCancellation(cancellationToken)) { buffer.Add(item); if (buffer.Count >= optimalSize) { yield return buffer; buffer = new List(optimalSize); } } if (buffer.Count > 0) { yield return buffer; } } // Private helper methods private static IEnumerable ExternalJoinIterator( IEnumerable outer, IEnumerable inner, Func outerKeySelector, Func innerKeySelector, Func resultSelector, IEqualityComparer comparer, int bufferSize) { comparer ??= EqualityComparer.Default; // Process inner sequence in chunks foreach (var innerChunk in inner.Chunk(bufferSize)) { var lookup = innerChunk.ToLookup(innerKeySelector, comparer); foreach (var outerItem in outer) { var key = outerKeySelector(outerItem); foreach (var innerItem in lookup[key]) { yield return resultSelector(outerItem, innerItem); } } } } private static void SaveCheckpoint(List data, string path) { // Simplified - in production would use proper serialization using var writer = new StreamWriter(path); writer.WriteLine(data.Count); foreach (var item in data) { writer.WriteLine(item?.ToString() ?? "null"); } } private static List RestoreCheckpoint(string path) { // Simplified - in production would use proper deserialization var lines = File.ReadAllLines(path); var count = int.Parse(lines[0]); var result = new List(count); // This is a simplified implementation // Real implementation would handle type conversion properly for (int i = 1; i <= count && i < lines.Length; i++) { if (typeof(T) == typeof(string)) { result.Add((T)(object)lines[i]); } else if (typeof(T) == typeof(int) && int.TryParse(lines[i], out var intVal)) { result.Add((T)(object)intVal); } // Add more type conversions as needed } return result; } private static IEnumerable ExternalSetOperation( IEnumerable first, IEnumerable second, SetOperation operation, IEqualityComparer comparer, int bufferSize) { // Simplified external set operation var seen = new HashSet(comparer); var spillFile = Path.GetTempFileName(); try { // Process first sequence foreach (var item in first) { if (seen.Count >= bufferSize) { // Spill to disk SpillToDisk(seen, spillFile); seen.Clear(); } if (seen.Add(item)) { yield return item; } } // Process second sequence for union if (operation == SetOperation.Union) { foreach (var item in second) { if (!seen.Contains(item) && !ExistsInSpillFile(item, spillFile, comparer)) { yield return item; } } } } finally { if (File.Exists(spillFile)) { File.Delete(spillFile); } } } private static void SpillToDisk(HashSet items, string path) { using var writer = new StreamWriter(path, append: true); foreach (var item in items) { writer.WriteLine(item?.ToString() ?? "null"); } } private static bool ExistsInSpillFile(T item, string path, IEqualityComparer comparer) { if (!File.Exists(path)) return false; // Simplified - real implementation would be more efficient var itemStr = item?.ToString() ?? "null"; return File.ReadLines(path).Any(line => line == itemStr); } private enum SetOperation { Union, Intersect, Except } } // Supporting classes internal class ExternalOrderedEnumerable : IOrderedEnumerable { private readonly IEnumerable _source; private readonly Func _keySelector; private readonly IComparer _comparer; private readonly int _bufferSize; public ExternalOrderedEnumerable( IEnumerable source, Func keySelector, IComparer comparer, int? bufferSize) { _source = source; _keySelector = keySelector; _comparer = comparer ?? Comparer.Default; _bufferSize = bufferSize ?? (int)Math.Sqrt(source.Count()); } public IOrderedEnumerable CreateOrderedEnumerable( Func keySelector, IComparer comparer, bool descending) { // Simplified - would need proper implementation throw new NotImplementedException(); } public IEnumerator GetEnumerator() { // External merge sort implementation var chunks = new List>(); var chunk = new List(_bufferSize); foreach (var item in _source) { chunk.Add(item); if (chunk.Count >= _bufferSize) { chunks.Add(chunk.OrderBy(_keySelector, _comparer).ToList()); chunk = new List(_bufferSize); } } if (chunk.Count > 0) { chunks.Add(chunk.OrderBy(_keySelector, _comparer).ToList()); } // Merge sorted chunks return MergeSortedChunks(chunks).GetEnumerator(); } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } private IEnumerable MergeSortedChunks(List> chunks) { var indices = new int[chunks.Count]; while (true) { TSource minItem = default; TKey minKey = default; int minChunk = -1; // Find minimum across all chunks for (int i = 0; i < chunks.Count; i++) { if (indices[i] < chunks[i].Count) { var item = chunks[i][indices[i]]; var key = _keySelector(item); if (minChunk == -1 || _comparer.Compare(key, minKey) < 0) { minItem = item; minKey = key; minChunk = i; } } } if (minChunk == -1) yield break; yield return minItem; indices[minChunk]++; } } } internal class ExternalGrouping : IEnumerable> { private readonly IEnumerable _source; private readonly Func _keySelector; private readonly int _bufferSize; public ExternalGrouping(IEnumerable source, Func keySelector, int bufferSize) { _source = source; _keySelector = keySelector; _bufferSize = bufferSize; } public IEnumerator> GetEnumerator() { var groups = new Dictionary>(_bufferSize); var spilledGroups = new Dictionary(); foreach (var item in _source) { var key = _keySelector(item); if (!groups.ContainsKey(key)) { if (groups.Count >= _bufferSize) { // Spill largest group to disk SpillLargestGroup(groups, spilledGroups); } groups[key] = new List(); } groups[key].Add(item); } // Return in-memory groups foreach (var kvp in groups) { yield return new Grouping(kvp.Key, kvp.Value); } // Return spilled groups foreach (var kvp in spilledGroups) { var items = LoadSpilledGroup(kvp.Value); yield return new Grouping(kvp.Key, items); File.Delete(kvp.Value); } } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } private void SpillLargestGroup( Dictionary> groups, Dictionary spilledGroups) { var largest = groups.OrderByDescending(g => g.Value.Count).First(); var spillFile = Path.GetTempFileName(); // Simplified serialization File.WriteAllLines(spillFile, largest.Value.Select(v => v?.ToString() ?? "null")); spilledGroups[largest.Key] = spillFile; groups.Remove(largest.Key); } private List LoadSpilledGroup(string path) { // Simplified deserialization return File.ReadAllLines(path).Select(line => (T)(object)line).ToList(); } } internal class Grouping : IGrouping { public TKey Key { get; } private readonly IEnumerable _elements; public Grouping(TKey key, IEnumerable elements) { Key = key; _elements = elements; } public IEnumerator GetEnumerator() { return _elements.GetEnumerator(); } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } } internal class ExternalDistinct : IEnumerable { private readonly IEnumerable _source; private readonly IEqualityComparer _comparer; private readonly int _maxMemoryItems; public ExternalDistinct(IEnumerable source, IEqualityComparer comparer, int maxMemoryItems) { _source = source; _comparer = comparer ?? EqualityComparer.Default; _maxMemoryItems = maxMemoryItems; } public IEnumerator GetEnumerator() { var seen = new HashSet(_comparer); var spillFile = Path.GetTempFileName(); try { foreach (var item in _source) { if (seen.Count >= _maxMemoryItems) { // Spill to disk and clear memory SpillHashSet(seen, spillFile); seen.Clear(); } if (seen.Add(item) && !ExistsInSpillFile(item, spillFile)) { yield return item; } } } finally { if (File.Exists(spillFile)) { File.Delete(spillFile); } } } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } private void SpillHashSet(HashSet items, string path) { using var writer = new StreamWriter(path, append: true); foreach (var item in items) { writer.WriteLine(item?.ToString() ?? "null"); } } private bool ExistsInSpillFile(T item, string path) { if (!File.Exists(path)) return false; var itemStr = item?.ToString() ?? "null"; return File.ReadLines(path).Any(line => line == itemStr); } } }