Files
2025-11-28 17:51:29 -05:00

176 lines
6.3 KiB
C#

using MarketAlly.IronWiki;
using MarketAlly.IronWiki.Nodes;
using MarketAlly.IronWiki.Parsing;
using Xunit;
namespace MarketAlly.IronWiki.Tests;
public class LargeFileTest
{
[Fact]
public void Parse_WikiBridgeArticle_Succeeds()
{
// Arrange
var filePath = Path.Combine(AppContext.BaseDirectory, "examples", "wiki_en_3397.txt");
if (!File.Exists(filePath))
{
// Try relative path from test project
filePath = Path.Combine(Directory.GetCurrentDirectory(), "..", "..", "..", "examples", "wiki_en_3397.txt");
}
Assert.True(File.Exists(filePath), $"Test file not found at {filePath}");
var wikitext = File.ReadAllText(filePath);
var parser = new WikitextParser();
// Act
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
var result = parser.Parse(wikitext);
stopwatch.Stop();
// Assert
Assert.NotNull(result);
Assert.True(result.Lines.Count > 0, "Document should have lines");
// Output some stats
var nodeCount = CountNodes(result);
Console.WriteLine($"Parsed {wikitext.Length:N0} characters in {stopwatch.ElapsedMilliseconds}ms");
Console.WriteLine($"Total nodes: {nodeCount}");
Console.WriteLine($"Lines/blocks: {result.Lines.Count}");
// Count specific node types
var templates = CountNodeType<Template>(result);
var wikiLinks = CountNodeType<WikiLink>(result);
var externalLinks = CountNodeType<ExternalLink>(result);
var headings = result.Lines.OfType<Heading>().Count();
var tables = result.Lines.OfType<Table>().Count();
Console.WriteLine($"Templates: {templates}");
Console.WriteLine($"Wiki links: {wikiLinks}");
Console.WriteLine($"External links: {externalLinks}");
Console.WriteLine($"Headings: {headings}");
Console.WriteLine($"Tables: {tables}");
// Verify round-trip
var output = result.ToString();
Console.WriteLine($"Output length: {output.Length:N0} characters");
// The output should be similar in length (may differ slightly due to normalization)
var lengthDiff = Math.Abs(output.Length - wikitext.Length);
var percentDiff = (double)lengthDiff / wikitext.Length * 100;
Console.WriteLine($"Length difference: {lengthDiff:N0} ({percentDiff:F2}%)");
}
[Fact]
public void Parse_WikiAstrosArticleWithTables_Succeeds()
{
// Arrange - this article has wiki tables
var filePath = Path.Combine(AppContext.BaseDirectory, "examples", "wiki_en_58817434.txt");
if (!File.Exists(filePath))
{
filePath = Path.Combine(Directory.GetCurrentDirectory(), "..", "..", "..", "examples", "wiki_en_58817434.txt");
}
Assert.True(File.Exists(filePath), $"Test file not found at {filePath}");
var wikitext = File.ReadAllText(filePath);
// Test the full file
var testText = wikitext;
Console.WriteLine($"Testing with {testText.Length:N0} characters");
var parser = new WikitextParser();
// Act
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
var result = parser.Parse(testText);
stopwatch.Stop();
// Assert
Assert.NotNull(result);
Assert.True(result.Lines.Count > 0, "Document should have lines");
// Output some stats
var nodeCount = CountNodes(result);
Console.WriteLine($"Parsed {testText.Length:N0} characters in {stopwatch.ElapsedMilliseconds}ms");
Console.WriteLine($"Total nodes: {nodeCount}");
Console.WriteLine($"Lines/blocks: {result.Lines.Count}");
// Count specific node types
var templates = CountNodeType<Template>(result);
var wikiLinks = CountNodeType<WikiLink>(result);
var externalLinks = CountNodeType<ExternalLink>(result);
var headings = result.Lines.OfType<Heading>().Count();
var tables = result.Lines.OfType<Table>().Count();
Console.WriteLine($"Templates: {templates}");
Console.WriteLine($"Wiki links: {wikiLinks}");
Console.WriteLine($"External links: {externalLinks}");
Console.WriteLine($"Headings: {headings}");
Console.WriteLine($"Tables: {tables}");
// This article should have tables (in first 100 lines, table starts at line 48)
Assert.True(tables > 0, "Article should contain wiki tables");
// Verify round-trip
var output = result.ToString();
Console.WriteLine($"Output length: {output.Length:N0} characters");
var lengthDiff = Math.Abs(output.Length - testText.Length);
var percentDiff = (double)lengthDiff / testText.Length * 100;
Console.WriteLine($"Length difference: {lengthDiff:N0} ({percentDiff:F2}%)");
}
[Fact]
public void Parse_WithDiagnostics_CollectsRecoveryInfo()
{
// Arrange - parse a file that may require recovery
var filePath = Path.Combine(AppContext.BaseDirectory, "examples", "wiki_en_58817434.txt");
if (!File.Exists(filePath))
{
filePath = Path.Combine(Directory.GetCurrentDirectory(), "..", "..", "..", "examples", "wiki_en_58817434.txt");
}
Assert.True(File.Exists(filePath), $"Test file not found at {filePath}");
var wikitext = File.ReadAllText(filePath);
var parser = new WikitextParser();
var diagnostics = new List<ParsingDiagnostic>();
// Act
var result = parser.Parse(wikitext, diagnostics);
// Assert
Assert.NotNull(result);
Console.WriteLine($"Total diagnostics: {diagnostics.Count}");
foreach (var diag in diagnostics.Take(10))
{
Console.WriteLine(diag.ToString());
}
if (diagnostics.Count > 10)
{
Console.WriteLine($"... and {diagnostics.Count - 10} more");
}
}
private static int CountNodes(WikiNode node)
{
var count = 1;
foreach (var child in node.EnumerateChildren())
{
count += CountNodes(child);
}
return count;
}
private static int CountNodeType<T>(WikiNode node) where T : WikiNode
{
var count = node is T ? 1 : 0;
foreach (var child in node.EnumerateChildren())
{
count += CountNodeType<T>(child);
}
return count;
}
}