using System.Collections;
using SqrtSpace.SpaceTime.Core;
namespace SqrtSpace.SpaceTime.Linq;
///
/// External merge sort implementation for large datasets
///
internal sealed class ExternalOrderedEnumerable : IOrderedEnumerable where TKey : notnull
{
private readonly IEnumerable _source;
private readonly Func _keySelector;
private readonly IComparer _comparer;
private readonly int _bufferSize;
public ExternalOrderedEnumerable(
IEnumerable source,
Func keySelector,
IComparer? comparer,
int? bufferSize)
{
_source = source;
_keySelector = keySelector;
_comparer = comparer ?? Comparer.Default;
var count = source.TryGetNonEnumeratedCount(out var c) ? c : 100_000;
_bufferSize = bufferSize ?? SpaceTimeCalculator.CalculateSqrtInterval(count);
}
public IOrderedEnumerable CreateOrderedEnumerable(
Func keySelector,
IComparer? comparer,
bool descending)
{
// Create secondary sort key
return new ThenByOrderedEnumerable(
this, keySelector, comparer, descending);
}
public IEnumerator GetEnumerator()
{
// External merge sort implementation
using var storage = new ExternalStorage();
var chunks = new List();
var chunk = new List(_bufferSize);
// Phase 1: Sort chunks and spill to disk
foreach (var item in _source)
{
chunk.Add(item);
if (chunk.Count >= _bufferSize)
{
var sortedChunk = chunk.OrderBy(_keySelector, _comparer).ToList();
var spillFile = storage.SpillToDiskAsync(sortedChunk).GetAwaiter().GetResult();
chunks.Add(spillFile);
chunk.Clear();
}
}
// Sort and spill remaining items
if (chunk.Count > 0)
{
var sortedChunk = chunk.OrderBy(_keySelector, _comparer).ToList();
var spillFile = storage.SpillToDiskAsync(sortedChunk).GetAwaiter().GetResult();
chunks.Add(spillFile);
}
// Phase 2: Merge sorted chunks
if (chunks.Count == 0)
yield break;
if (chunks.Count == 1)
{
// Single chunk, just read it back
foreach (var item in storage.ReadFromDiskAsync(chunks[0]).ToBlockingEnumerable())
{
yield return item;
}
}
else
{
// Multi-way merge
var iterators = new List>();
var heap = new SortedDictionary<(TKey key, int index), (TSource item, int streamIndex)>(
new MergeComparer(_comparer));
try
{
// Initialize iterators
for (int i = 0; i < chunks.Count; i++)
{
var iterator = storage.ReadFromDiskAsync(chunks[i]).ToBlockingEnumerable().GetEnumerator();
iterators.Add(iterator);
if (iterator.MoveNext())
{
var item = iterator.Current;
var key = _keySelector(item);
heap.Add((key, i), (item, i));
}
}
// Merge
while (heap.Count > 0)
{
var min = heap.First();
yield return min.Value.item;
heap.Remove(min.Key);
var streamIndex = min.Value.streamIndex;
if (iterators[streamIndex].MoveNext())
{
var item = iterators[streamIndex].Current;
var key = _keySelector(item);
heap.Add((key, streamIndex), (item, streamIndex));
}
}
}
finally
{
foreach (var iterator in iterators)
{
iterator.Dispose();
}
}
}
}
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
private sealed class MergeComparer : IComparer<(T key, int index)>
{
private readonly IComparer _keyComparer;
public MergeComparer(IComparer keyComparer)
{
_keyComparer = keyComparer;
}
public int Compare((T key, int index) x, (T key, int index) y)
{
var keyComparison = _keyComparer.Compare(x.key, y.key);
return keyComparison != 0 ? keyComparison : x.index.CompareTo(y.index);
}
}
}
///
/// Secondary ordering for ThenBy operations
///
internal sealed class ThenByOrderedEnumerable : IOrderedEnumerable
{
private readonly IOrderedEnumerable _primary;
private readonly Func _keySelector;
private readonly IComparer _comparer;
private readonly bool _descending;
public ThenByOrderedEnumerable(
IOrderedEnumerable primary,
Func keySelector,
IComparer? comparer,
bool descending)
{
_primary = primary;
_keySelector = keySelector;
_comparer = comparer ?? Comparer.Default;
_descending = descending;
}
public IOrderedEnumerable CreateOrderedEnumerable(
Func keySelector,
IComparer? comparer,
bool descending)
{
return new ThenByOrderedEnumerable(
this, keySelector, comparer, descending);
}
public IEnumerator GetEnumerator()
{
// For simplicity, materialize and use standard LINQ
// A production implementation would merge this into the external sort
var items = _primary.ToList();
var ordered = _descending
? items.OrderByDescending(_keySelector, _comparer)
: items.OrderBy(_keySelector, _comparer);
foreach (var item in ordered)
{
yield return item;
}
}
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}