using System.Collections; using SqrtSpace.SpaceTime.Core; namespace SqrtSpace.SpaceTime.Linq; /// /// External merge sort implementation for large datasets /// internal sealed class ExternalOrderedEnumerable : IOrderedEnumerable where TKey : notnull { private readonly IEnumerable _source; private readonly Func _keySelector; private readonly IComparer _comparer; private readonly int _bufferSize; public ExternalOrderedEnumerable( IEnumerable source, Func keySelector, IComparer? comparer, int? bufferSize) { _source = source; _keySelector = keySelector; _comparer = comparer ?? Comparer.Default; var count = source.TryGetNonEnumeratedCount(out var c) ? c : 100_000; _bufferSize = bufferSize ?? SpaceTimeCalculator.CalculateSqrtInterval(count); } public IOrderedEnumerable CreateOrderedEnumerable( Func keySelector, IComparer? comparer, bool descending) { // Create secondary sort key return new ThenByOrderedEnumerable( this, keySelector, comparer, descending); } public IEnumerator GetEnumerator() { // External merge sort implementation using var storage = new ExternalStorage(); var chunks = new List(); var chunk = new List(_bufferSize); // Phase 1: Sort chunks and spill to disk foreach (var item in _source) { chunk.Add(item); if (chunk.Count >= _bufferSize) { var sortedChunk = chunk.OrderBy(_keySelector, _comparer).ToList(); var spillFile = storage.SpillToDiskAsync(sortedChunk).GetAwaiter().GetResult(); chunks.Add(spillFile); chunk.Clear(); } } // Sort and spill remaining items if (chunk.Count > 0) { var sortedChunk = chunk.OrderBy(_keySelector, _comparer).ToList(); var spillFile = storage.SpillToDiskAsync(sortedChunk).GetAwaiter().GetResult(); chunks.Add(spillFile); } // Phase 2: Merge sorted chunks if (chunks.Count == 0) yield break; if (chunks.Count == 1) { // Single chunk, just read it back foreach (var item in storage.ReadFromDiskAsync(chunks[0]).ToBlockingEnumerable()) { yield return item; } } else { // Multi-way merge var iterators = new List>(); var heap = new SortedDictionary<(TKey key, int index), (TSource item, int streamIndex)>( new MergeComparer(_comparer)); try { // Initialize iterators for (int i = 0; i < chunks.Count; i++) { var iterator = storage.ReadFromDiskAsync(chunks[i]).ToBlockingEnumerable().GetEnumerator(); iterators.Add(iterator); if (iterator.MoveNext()) { var item = iterator.Current; var key = _keySelector(item); heap.Add((key, i), (item, i)); } } // Merge while (heap.Count > 0) { var min = heap.First(); yield return min.Value.item; heap.Remove(min.Key); var streamIndex = min.Value.streamIndex; if (iterators[streamIndex].MoveNext()) { var item = iterators[streamIndex].Current; var key = _keySelector(item); heap.Add((key, streamIndex), (item, streamIndex)); } } } finally { foreach (var iterator in iterators) { iterator.Dispose(); } } } } IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); private sealed class MergeComparer : IComparer<(T key, int index)> { private readonly IComparer _keyComparer; public MergeComparer(IComparer keyComparer) { _keyComparer = keyComparer; } public int Compare((T key, int index) x, (T key, int index) y) { var keyComparison = _keyComparer.Compare(x.key, y.key); return keyComparison != 0 ? keyComparison : x.index.CompareTo(y.index); } } } /// /// Secondary ordering for ThenBy operations /// internal sealed class ThenByOrderedEnumerable : IOrderedEnumerable { private readonly IOrderedEnumerable _primary; private readonly Func _keySelector; private readonly IComparer _comparer; private readonly bool _descending; public ThenByOrderedEnumerable( IOrderedEnumerable primary, Func keySelector, IComparer? comparer, bool descending) { _primary = primary; _keySelector = keySelector; _comparer = comparer ?? Comparer.Default; _descending = descending; } public IOrderedEnumerable CreateOrderedEnumerable( Func keySelector, IComparer? comparer, bool descending) { return new ThenByOrderedEnumerable( this, keySelector, comparer, descending); } public IEnumerator GetEnumerator() { // For simplicity, materialize and use standard LINQ // A production implementation would merge this into the external sort var items = _primary.ToList(); var ordered = _descending ? items.OrderByDescending(_keySelector, _comparer) : items.OrderBy(_keySelector, _comparer); foreach (var item in ordered) { yield return item; } } IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); }