using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using System.Runtime.CompilerServices;
using System.Threading;
namespace SqrtSpace.SpaceTime.Linq
{
///
/// LINQ extensions that implement space-time tradeoffs for memory-efficient operations
///
public static class SpaceTimeLinqExtensions
{
///
/// Orders a sequence using external merge sort with √n memory usage
///
public static IOrderedEnumerable OrderByExternal(
this IEnumerable source,
Func keySelector,
IComparer comparer = null,
int? bufferSize = null)
{
if (source == null) throw new ArgumentNullException(nameof(source));
if (keySelector == null) throw new ArgumentNullException(nameof(keySelector));
return new ExternalOrderedEnumerable(source, keySelector, comparer, bufferSize);
}
///
/// Groups elements using √n memory for large datasets
///
public static IEnumerable> GroupByExternal(
this IEnumerable source,
Func keySelector,
int? bufferSize = null)
{
if (source == null) throw new ArgumentNullException(nameof(source));
if (keySelector == null) throw new ArgumentNullException(nameof(keySelector));
var count = source.TryGetNonEnumeratedCount(out var c) ? c : 1000000;
var optimalBuffer = bufferSize ?? (int)Math.Sqrt(count);
return new ExternalGrouping(source, keySelector, optimalBuffer);
}
///
/// Processes sequence in √n-sized batches for memory efficiency
///
public static IEnumerable> BatchBySqrtN(
this IEnumerable source,
int? totalCount = null)
{
if (source == null) throw new ArgumentNullException(nameof(source));
var count = totalCount ?? (source.TryGetNonEnumeratedCount(out var c) ? c : 1000);
var batchSize = Math.Max(1, (int)Math.Sqrt(count));
return source.Chunk(batchSize).Select(chunk => chunk.ToList());
}
///
/// Performs a memory-efficient join using √n buffers
///
public static IEnumerable JoinExternal(
this IEnumerable outer,
IEnumerable inner,
Func outerKeySelector,
Func innerKeySelector,
Func resultSelector,
IEqualityComparer comparer = null)
{
if (outer == null) throw new ArgumentNullException(nameof(outer));
if (inner == null) throw new ArgumentNullException(nameof(inner));
var innerCount = inner.TryGetNonEnumeratedCount(out var c) ? c : 10000;
var bufferSize = (int)Math.Sqrt(innerCount);
return ExternalJoinIterator(outer, inner, outerKeySelector, innerKeySelector,
resultSelector, comparer, bufferSize);
}
///
/// Converts sequence to a list with checkpointing for fault tolerance
///
public static List ToCheckpointedList(
this IEnumerable source,
string checkpointPath = null,
int? checkpointInterval = null)
{
if (source == null) throw new ArgumentNullException(nameof(source));
var result = new List();
var count = 0;
var interval = checkpointInterval ?? (int)Math.Sqrt(source.Count());
checkpointPath ??= Path.GetTempFileName();
try
{
// Try to restore from checkpoint
if (File.Exists(checkpointPath))
{
result = RestoreCheckpoint(checkpointPath);
count = result.Count;
}
foreach (var item in source.Skip(count))
{
result.Add(item);
count++;
if (count % interval == 0)
{
SaveCheckpoint(result, checkpointPath);
}
}
return result;
}
finally
{
// Clean up checkpoint file
if (File.Exists(checkpointPath))
{
File.Delete(checkpointPath);
}
}
}
///
/// Performs distinct operation with limited memory using external storage
///
public static IEnumerable DistinctExternal(
this IEnumerable source,
IEqualityComparer comparer = null,
int? maxMemoryItems = null)
{
if (source == null) throw new ArgumentNullException(nameof(source));
var maxItems = maxMemoryItems ?? (int)Math.Sqrt(source.Count());
return new ExternalDistinct(source, comparer, maxItems);
}
///
/// Aggregates large sequences with √n memory checkpoints
///
public static TAccumulate AggregateWithCheckpoints(
this IEnumerable source,
TAccumulate seed,
Func func,
int? checkpointInterval = null)
{
if (source == null) throw new ArgumentNullException(nameof(source));
if (func == null) throw new ArgumentNullException(nameof(func));
var accumulator = seed;
var count = 0;
var interval = checkpointInterval ?? (int)Math.Sqrt(source.Count());
var checkpoints = new Stack<(int index, TAccumulate value)>();
foreach (var item in source)
{
accumulator = func(accumulator, item);
count++;
if (count % interval == 0)
{
// Deep copy if TAccumulate is a reference type
var checkpoint = accumulator is ICloneable cloneable
? (TAccumulate)cloneable.Clone()
: accumulator;
checkpoints.Push((count, checkpoint));
}
}
return accumulator;
}
///
/// Memory-efficient set operations using external storage
///
public static IEnumerable UnionExternal(
this IEnumerable first,
IEnumerable second,
IEqualityComparer comparer = null)
{
if (first == null) throw new ArgumentNullException(nameof(first));
if (second == null) throw new ArgumentNullException(nameof(second));
var totalCount = first.Count() + second.Count();
var bufferSize = (int)Math.Sqrt(totalCount);
return ExternalSetOperation(first, second, SetOperation.Union, comparer, bufferSize);
}
///
/// Async enumerable with √n buffering for optimal memory usage
///
public static async IAsyncEnumerable> BufferAsync(
this IAsyncEnumerable source,
int? bufferSize = null,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
{
if (source == null) throw new ArgumentNullException(nameof(source));
var buffer = new List(bufferSize ?? 1000);
var optimalSize = bufferSize ?? (int)Math.Sqrt(1000000); // Assume large dataset
await foreach (var item in source.WithCancellation(cancellationToken))
{
buffer.Add(item);
if (buffer.Count >= optimalSize)
{
yield return buffer;
buffer = new List(optimalSize);
}
}
if (buffer.Count > 0)
{
yield return buffer;
}
}
// Private helper methods
private static IEnumerable ExternalJoinIterator(
IEnumerable outer,
IEnumerable inner,
Func outerKeySelector,
Func innerKeySelector,
Func resultSelector,
IEqualityComparer comparer,
int bufferSize)
{
comparer ??= EqualityComparer.Default;
// Process inner sequence in chunks
foreach (var innerChunk in inner.Chunk(bufferSize))
{
var lookup = innerChunk.ToLookup(innerKeySelector, comparer);
foreach (var outerItem in outer)
{
var key = outerKeySelector(outerItem);
foreach (var innerItem in lookup[key])
{
yield return resultSelector(outerItem, innerItem);
}
}
}
}
private static void SaveCheckpoint(List data, string path)
{
// Simplified - in production would use proper serialization
using var writer = new StreamWriter(path);
writer.WriteLine(data.Count);
foreach (var item in data)
{
writer.WriteLine(item?.ToString() ?? "null");
}
}
private static List RestoreCheckpoint(string path)
{
// Simplified - in production would use proper deserialization
var lines = File.ReadAllLines(path);
var count = int.Parse(lines[0]);
var result = new List(count);
// This is a simplified implementation
// Real implementation would handle type conversion properly
for (int i = 1; i <= count && i < lines.Length; i++)
{
if (typeof(T) == typeof(string))
{
result.Add((T)(object)lines[i]);
}
else if (typeof(T) == typeof(int) && int.TryParse(lines[i], out var intVal))
{
result.Add((T)(object)intVal);
}
// Add more type conversions as needed
}
return result;
}
private static IEnumerable ExternalSetOperation(
IEnumerable first,
IEnumerable second,
SetOperation operation,
IEqualityComparer comparer,
int bufferSize)
{
// Simplified external set operation
var seen = new HashSet(comparer);
var spillFile = Path.GetTempFileName();
try
{
// Process first sequence
foreach (var item in first)
{
if (seen.Count >= bufferSize)
{
// Spill to disk
SpillToDisk(seen, spillFile);
seen.Clear();
}
if (seen.Add(item))
{
yield return item;
}
}
// Process second sequence for union
if (operation == SetOperation.Union)
{
foreach (var item in second)
{
if (!seen.Contains(item) && !ExistsInSpillFile(item, spillFile, comparer))
{
yield return item;
}
}
}
}
finally
{
if (File.Exists(spillFile))
{
File.Delete(spillFile);
}
}
}
private static void SpillToDisk(HashSet items, string path)
{
using var writer = new StreamWriter(path, append: true);
foreach (var item in items)
{
writer.WriteLine(item?.ToString() ?? "null");
}
}
private static bool ExistsInSpillFile(T item, string path, IEqualityComparer comparer)
{
if (!File.Exists(path)) return false;
// Simplified - real implementation would be more efficient
var itemStr = item?.ToString() ?? "null";
return File.ReadLines(path).Any(line => line == itemStr);
}
private enum SetOperation
{
Union,
Intersect,
Except
}
}
// Supporting classes
internal class ExternalOrderedEnumerable : IOrderedEnumerable
{
private readonly IEnumerable _source;
private readonly Func _keySelector;
private readonly IComparer _comparer;
private readonly int _bufferSize;
public ExternalOrderedEnumerable(
IEnumerable source,
Func keySelector,
IComparer comparer,
int? bufferSize)
{
_source = source;
_keySelector = keySelector;
_comparer = comparer ?? Comparer.Default;
_bufferSize = bufferSize ?? (int)Math.Sqrt(source.Count());
}
public IOrderedEnumerable CreateOrderedEnumerable(
Func keySelector,
IComparer comparer,
bool descending)
{
// Simplified - would need proper implementation
throw new NotImplementedException();
}
public IEnumerator GetEnumerator()
{
// External merge sort implementation
var chunks = new List>();
var chunk = new List(_bufferSize);
foreach (var item in _source)
{
chunk.Add(item);
if (chunk.Count >= _bufferSize)
{
chunks.Add(chunk.OrderBy(_keySelector, _comparer).ToList());
chunk = new List(_bufferSize);
}
}
if (chunk.Count > 0)
{
chunks.Add(chunk.OrderBy(_keySelector, _comparer).ToList());
}
// Merge sorted chunks
return MergeSortedChunks(chunks).GetEnumerator();
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
private IEnumerable MergeSortedChunks(List> chunks)
{
var indices = new int[chunks.Count];
while (true)
{
TSource minItem = default;
TKey minKey = default;
int minChunk = -1;
// Find minimum across all chunks
for (int i = 0; i < chunks.Count; i++)
{
if (indices[i] < chunks[i].Count)
{
var item = chunks[i][indices[i]];
var key = _keySelector(item);
if (minChunk == -1 || _comparer.Compare(key, minKey) < 0)
{
minItem = item;
minKey = key;
minChunk = i;
}
}
}
if (minChunk == -1) yield break;
yield return minItem;
indices[minChunk]++;
}
}
}
internal class ExternalGrouping : IEnumerable>
{
private readonly IEnumerable _source;
private readonly Func _keySelector;
private readonly int _bufferSize;
public ExternalGrouping(IEnumerable source, Func keySelector, int bufferSize)
{
_source = source;
_keySelector = keySelector;
_bufferSize = bufferSize;
}
public IEnumerator> GetEnumerator()
{
var groups = new Dictionary>(_bufferSize);
var spilledGroups = new Dictionary();
foreach (var item in _source)
{
var key = _keySelector(item);
if (!groups.ContainsKey(key))
{
if (groups.Count >= _bufferSize)
{
// Spill largest group to disk
SpillLargestGroup(groups, spilledGroups);
}
groups[key] = new List();
}
groups[key].Add(item);
}
// Return in-memory groups
foreach (var kvp in groups)
{
yield return new Grouping(kvp.Key, kvp.Value);
}
// Return spilled groups
foreach (var kvp in spilledGroups)
{
var items = LoadSpilledGroup(kvp.Value);
yield return new Grouping(kvp.Key, items);
File.Delete(kvp.Value);
}
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
private void SpillLargestGroup(
Dictionary> groups,
Dictionary spilledGroups)
{
var largest = groups.OrderByDescending(g => g.Value.Count).First();
var spillFile = Path.GetTempFileName();
// Simplified serialization
File.WriteAllLines(spillFile, largest.Value.Select(v => v?.ToString() ?? "null"));
spilledGroups[largest.Key] = spillFile;
groups.Remove(largest.Key);
}
private List LoadSpilledGroup(string path)
{
// Simplified deserialization
return File.ReadAllLines(path).Select(line => (T)(object)line).ToList();
}
}
internal class Grouping : IGrouping
{
public TKey Key { get; }
private readonly IEnumerable _elements;
public Grouping(TKey key, IEnumerable elements)
{
Key = key;
_elements = elements;
}
public IEnumerator GetEnumerator()
{
return _elements.GetEnumerator();
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
internal class ExternalDistinct : IEnumerable
{
private readonly IEnumerable _source;
private readonly IEqualityComparer _comparer;
private readonly int _maxMemoryItems;
public ExternalDistinct(IEnumerable source, IEqualityComparer comparer, int maxMemoryItems)
{
_source = source;
_comparer = comparer ?? EqualityComparer.Default;
_maxMemoryItems = maxMemoryItems;
}
public IEnumerator GetEnumerator()
{
var seen = new HashSet(_comparer);
var spillFile = Path.GetTempFileName();
try
{
foreach (var item in _source)
{
if (seen.Count >= _maxMemoryItems)
{
// Spill to disk and clear memory
SpillHashSet(seen, spillFile);
seen.Clear();
}
if (seen.Add(item) && !ExistsInSpillFile(item, spillFile))
{
yield return item;
}
}
}
finally
{
if (File.Exists(spillFile))
{
File.Delete(spillFile);
}
}
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
private void SpillHashSet(HashSet items, string path)
{
using var writer = new StreamWriter(path, append: true);
foreach (var item in items)
{
writer.WriteLine(item?.ToString() ?? "null");
}
}
private bool ExistsInSpillFile(T item, string path)
{
if (!File.Exists(path)) return false;
var itemStr = item?.ToString() ?? "null";
return File.ReadLines(path).Any(line => line == itemStr);
}
}
}