196 lines
6.5 KiB
C#
196 lines
6.5 KiB
C#
using System.Collections;
|
|
using SqrtSpace.SpaceTime.Core;
|
|
|
|
namespace SqrtSpace.SpaceTime.Linq;
|
|
|
|
/// <summary>
|
|
/// External merge sort implementation for large datasets
|
|
/// </summary>
|
|
internal sealed class ExternalOrderedEnumerable<TSource, TKey> : IOrderedEnumerable<TSource> where TKey : notnull
|
|
{
|
|
private readonly IEnumerable<TSource> _source;
|
|
private readonly Func<TSource, TKey> _keySelector;
|
|
private readonly IComparer<TKey> _comparer;
|
|
private readonly int _bufferSize;
|
|
|
|
public ExternalOrderedEnumerable(
|
|
IEnumerable<TSource> source,
|
|
Func<TSource, TKey> keySelector,
|
|
IComparer<TKey>? comparer,
|
|
int? bufferSize)
|
|
{
|
|
_source = source;
|
|
_keySelector = keySelector;
|
|
_comparer = comparer ?? Comparer<TKey>.Default;
|
|
|
|
var count = source.TryGetNonEnumeratedCount(out var c) ? c : 100_000;
|
|
_bufferSize = bufferSize ?? SpaceTimeCalculator.CalculateSqrtInterval(count);
|
|
}
|
|
|
|
public IOrderedEnumerable<TSource> CreateOrderedEnumerable<TNewKey>(
|
|
Func<TSource, TNewKey> keySelector,
|
|
IComparer<TNewKey>? comparer,
|
|
bool descending)
|
|
{
|
|
// Create secondary sort key
|
|
return new ThenByOrderedEnumerable<TSource, TKey, TNewKey>(
|
|
this, keySelector, comparer, descending);
|
|
}
|
|
|
|
public IEnumerator<TSource> GetEnumerator()
|
|
{
|
|
// External merge sort implementation
|
|
using var storage = new ExternalStorage<TSource>();
|
|
var chunks = new List<string>();
|
|
var chunk = new List<TSource>(_bufferSize);
|
|
|
|
// Phase 1: Sort chunks and spill to disk
|
|
foreach (var item in _source)
|
|
{
|
|
chunk.Add(item);
|
|
if (chunk.Count >= _bufferSize)
|
|
{
|
|
var sortedChunk = chunk.OrderBy(_keySelector, _comparer).ToList();
|
|
var spillFile = storage.SpillToDiskAsync(sortedChunk).GetAwaiter().GetResult();
|
|
chunks.Add(spillFile);
|
|
chunk.Clear();
|
|
}
|
|
}
|
|
|
|
// Sort and spill remaining items
|
|
if (chunk.Count > 0)
|
|
{
|
|
var sortedChunk = chunk.OrderBy(_keySelector, _comparer).ToList();
|
|
var spillFile = storage.SpillToDiskAsync(sortedChunk).GetAwaiter().GetResult();
|
|
chunks.Add(spillFile);
|
|
}
|
|
|
|
// Phase 2: Merge sorted chunks
|
|
if (chunks.Count == 0)
|
|
yield break;
|
|
|
|
if (chunks.Count == 1)
|
|
{
|
|
// Single chunk, just read it back
|
|
foreach (var item in storage.ReadFromDiskAsync(chunks[0]).ToBlockingEnumerable())
|
|
{
|
|
yield return item;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Multi-way merge
|
|
var iterators = new List<IEnumerator<TSource>>();
|
|
var heap = new SortedDictionary<(TKey key, int index), (TSource item, int streamIndex)>(
|
|
new MergeComparer<TKey>(_comparer));
|
|
|
|
try
|
|
{
|
|
// Initialize iterators
|
|
for (int i = 0; i < chunks.Count; i++)
|
|
{
|
|
var iterator = storage.ReadFromDiskAsync(chunks[i]).ToBlockingEnumerable().GetEnumerator();
|
|
iterators.Add(iterator);
|
|
|
|
if (iterator.MoveNext())
|
|
{
|
|
var item = iterator.Current;
|
|
var key = _keySelector(item);
|
|
heap.Add((key, i), (item, i));
|
|
}
|
|
}
|
|
|
|
// Merge
|
|
while (heap.Count > 0)
|
|
{
|
|
var min = heap.First();
|
|
yield return min.Value.item;
|
|
|
|
heap.Remove(min.Key);
|
|
|
|
var streamIndex = min.Value.streamIndex;
|
|
if (iterators[streamIndex].MoveNext())
|
|
{
|
|
var item = iterators[streamIndex].Current;
|
|
var key = _keySelector(item);
|
|
heap.Add((key, streamIndex), (item, streamIndex));
|
|
}
|
|
}
|
|
}
|
|
finally
|
|
{
|
|
foreach (var iterator in iterators)
|
|
{
|
|
iterator.Dispose();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
|
|
|
|
private sealed class MergeComparer<T> : IComparer<(T key, int index)>
|
|
{
|
|
private readonly IComparer<T> _keyComparer;
|
|
|
|
public MergeComparer(IComparer<T> keyComparer)
|
|
{
|
|
_keyComparer = keyComparer;
|
|
}
|
|
|
|
public int Compare((T key, int index) x, (T key, int index) y)
|
|
{
|
|
var keyComparison = _keyComparer.Compare(x.key, y.key);
|
|
return keyComparison != 0 ? keyComparison : x.index.CompareTo(y.index);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Secondary ordering for ThenBy operations
|
|
/// </summary>
|
|
internal sealed class ThenByOrderedEnumerable<TSource, TPrimaryKey, TSecondaryKey> : IOrderedEnumerable<TSource>
|
|
{
|
|
private readonly IOrderedEnumerable<TSource> _primary;
|
|
private readonly Func<TSource, TSecondaryKey> _keySelector;
|
|
private readonly IComparer<TSecondaryKey> _comparer;
|
|
private readonly bool _descending;
|
|
|
|
public ThenByOrderedEnumerable(
|
|
IOrderedEnumerable<TSource> primary,
|
|
Func<TSource, TSecondaryKey> keySelector,
|
|
IComparer<TSecondaryKey>? comparer,
|
|
bool descending)
|
|
{
|
|
_primary = primary;
|
|
_keySelector = keySelector;
|
|
_comparer = comparer ?? Comparer<TSecondaryKey>.Default;
|
|
_descending = descending;
|
|
}
|
|
|
|
public IOrderedEnumerable<TSource> CreateOrderedEnumerable<TNewKey>(
|
|
Func<TSource, TNewKey> keySelector,
|
|
IComparer<TNewKey>? comparer,
|
|
bool descending)
|
|
{
|
|
return new ThenByOrderedEnumerable<TSource, TSecondaryKey, TNewKey>(
|
|
this, keySelector, comparer, descending);
|
|
}
|
|
|
|
public IEnumerator<TSource> GetEnumerator()
|
|
{
|
|
// For simplicity, materialize and use standard LINQ
|
|
// A production implementation would merge this into the external sort
|
|
var items = _primary.ToList();
|
|
var ordered = _descending
|
|
? items.OrderByDescending(_keySelector, _comparer)
|
|
: items.OrderBy(_keySelector, _comparer);
|
|
|
|
foreach (var item in ordered)
|
|
{
|
|
yield return item;
|
|
}
|
|
}
|
|
|
|
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
|
|
} |