301 lines
7.8 KiB
Markdown
301 lines
7.8 KiB
Markdown
# ViewEngine.Client
|
|
|
|
Official .NET client library for consuming the ViewEngine REST API. Retrieve web pages, extract content, and process web data with ViewEngine's distributed web scraping service.
|
|
|
|
## Installation
|
|
|
|
```bash
|
|
dotnet add package ViewEngine.Client
|
|
```
|
|
|
|
## Quick Start
|
|
|
|
### Basic Usage
|
|
|
|
```csharp
|
|
using ViewEngine.Client;
|
|
using ViewEngine.Client.Models;
|
|
|
|
// Create client with API key
|
|
var client = new ViewEngineClient("ak_your-api-key-here");
|
|
|
|
// Submit a retrieval request
|
|
var request = new SubmitRetrievalRequest
|
|
{
|
|
Url = "https://example.com",
|
|
TimeoutSeconds = 60,
|
|
Priority = 5,
|
|
GenerateSummary = true // Optional: Generate AI summary
|
|
};
|
|
|
|
// Submit and wait for completion
|
|
var pageData = await client.RetrieveAndWaitAsync(request);
|
|
|
|
Console.WriteLine($"Title: {pageData.Title}");
|
|
Console.WriteLine($"Summary: {pageData.Summary}"); // AI-generated summary if requested
|
|
Console.WriteLine($"Body: {pageData.Body}");
|
|
Console.WriteLine($"Links found: {pageData.Routes.Count}");
|
|
```
|
|
|
|
### Advanced Usage with Polling
|
|
|
|
```csharp
|
|
// Submit request
|
|
var submitResponse = await client.SubmitRetrievalAsync(request);
|
|
Console.WriteLine($"Request ID: {submitResponse.RequestId}");
|
|
|
|
// Poll for status
|
|
RetrievalStatusResponse status;
|
|
do
|
|
{
|
|
status = await client.GetRetrievalStatusAsync(submitResponse.RequestId);
|
|
Console.WriteLine($"Status: {status.Status}");
|
|
|
|
if (status.Status == "complete")
|
|
{
|
|
// Download content
|
|
var content = await client.GetPageContentAsync(submitResponse.RequestId);
|
|
Console.WriteLine($"Retrieved: {content.Title}");
|
|
break;
|
|
}
|
|
|
|
await Task.Delay(2000); // Wait 2 seconds
|
|
} while (status.Status == "queued" || status.Status == "processing");
|
|
```
|
|
|
|
## Dependency Injection
|
|
|
|
### ASP.NET Core / Minimal API
|
|
|
|
**appsettings.json:**
|
|
```json
|
|
{
|
|
"ViewEngine": {
|
|
"ApiKey": "ak_your-api-key-here",
|
|
"BaseUrl": "https://www.viewengine.io/api/v1",
|
|
"TimeoutSeconds": 120,
|
|
"MaxRetries": 3
|
|
}
|
|
}
|
|
```
|
|
|
|
**Program.cs:**
|
|
```csharp
|
|
using ViewEngine.Client.Extensions;
|
|
|
|
var builder = WebApplication.CreateBuilder(args);
|
|
|
|
// Add ViewEngine client
|
|
builder.Services.AddViewEngineClient(builder.Configuration);
|
|
|
|
var app = builder.Build();
|
|
|
|
app.MapGet("/retrieve", async (ViewEngineClient client) =>
|
|
{
|
|
var request = new SubmitRetrievalRequest { Url = "https://example.com" };
|
|
var pageData = await client.RetrieveAndWaitAsync(request);
|
|
return Results.Ok(pageData);
|
|
});
|
|
|
|
app.Run();
|
|
```
|
|
|
|
### With Options Pattern
|
|
|
|
```csharp
|
|
using ViewEngine.Client.Extensions;
|
|
|
|
builder.Services.AddViewEngineClient(options =>
|
|
{
|
|
options.ApiKey = "ak_your-api-key-here";
|
|
options.BaseUrl = "https://www.viewengine.io/api/v1";
|
|
options.TimeoutSeconds = 120;
|
|
options.MaxRetries = 3;
|
|
});
|
|
```
|
|
|
|
## Features
|
|
|
|
### Web Page Retrieval
|
|
|
|
```csharp
|
|
var request = new SubmitRetrievalRequest
|
|
{
|
|
Url = "https://example.com",
|
|
TimeoutSeconds = 60,
|
|
ForceRefresh = true,
|
|
Priority = 8,
|
|
PreferredPlatform = "Windows", // Android, iOS, or Windows
|
|
GenerateSummary = true // Generate AI summary of page content
|
|
};
|
|
|
|
var pageData = await client.RetrieveAndWaitAsync(request);
|
|
|
|
// Access extracted data
|
|
Console.WriteLine($"Title: {pageData.Title}");
|
|
Console.WriteLine($"Summary: {pageData.Summary}"); // AI-generated summary
|
|
Console.WriteLine($"Description: {pageData.MetaDescription}");
|
|
Console.WriteLine($"Body Text: {pageData.Body}");
|
|
Console.WriteLine($"Favicon: {pageData.FaviconUrl}");
|
|
|
|
// Navigation links
|
|
foreach (var link in pageData.Routes)
|
|
{
|
|
Console.WriteLine($"{link.Text} -> {link.Url}");
|
|
}
|
|
|
|
// Body links with ad detection
|
|
foreach (var link in pageData.BodyRoutes)
|
|
{
|
|
if (link.IsPotentialAd)
|
|
Console.WriteLine($"[AD] {link.Text} ({link.AdReason})");
|
|
else
|
|
Console.WriteLine($"{link.Text} -> {link.Url}");
|
|
}
|
|
```
|
|
|
|
### Client Management
|
|
|
|
```csharp
|
|
// Add a client
|
|
var addRequest = new AddClientRequest
|
|
{
|
|
Email = "user@example.com",
|
|
Alias = "Production Feeder",
|
|
CustomUserId = "prod-001",
|
|
DailyMaximum = 1000
|
|
};
|
|
|
|
var newClient = await client.AddClientAsync(addRequest);
|
|
|
|
// List all clients
|
|
var clients = await client.GetClientsAsync();
|
|
foreach (var c in clients)
|
|
{
|
|
Console.WriteLine($"{c.Alias}: {c.FeederOnline ? "Online" : "Offline"}");
|
|
}
|
|
|
|
// Route job to specific client
|
|
var routedRequest = new SubmitRetrievalRequest
|
|
{
|
|
Url = "https://example.com",
|
|
ClientId = newClient.Id // or CustomUserId = "prod-001"
|
|
};
|
|
|
|
// Get client stats
|
|
var stats = await client.GetClientStatsAsync(newClient.Id);
|
|
Console.WriteLine($"Total jobs: {stats.TotalJobsProcessed}");
|
|
Console.WriteLine($"Success rate: {stats.SuccessRate}%");
|
|
|
|
// Suspend/Activate/Delete
|
|
await client.SuspendClientAsync(newClient.Id);
|
|
await client.ActivateClientAsync(newClient.Id);
|
|
await client.DeleteClientAsync(newClient.Id);
|
|
```
|
|
|
|
### Error Handling
|
|
|
|
```csharp
|
|
try
|
|
{
|
|
var pageData = await client.RetrieveAndWaitAsync(request);
|
|
}
|
|
catch (HttpRequestException ex)
|
|
{
|
|
// HTTP errors (network, server errors)
|
|
Console.WriteLine($"HTTP Error: {ex.Message}");
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
// Failed retrieval or invalid responses
|
|
Console.WriteLine($"Operation Error: {ex.Message}");
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
// Request was canceled
|
|
Console.WriteLine("Request canceled");
|
|
}
|
|
```
|
|
|
|
## Configuration Options
|
|
|
|
| Option | Default | Description |
|
|
|--------|---------|-------------|
|
|
| `ApiKey` | (required) | Your ViewEngine API key |
|
|
| `BaseUrl` | `https://www.viewengine.io/api/v1` | API base URL |
|
|
| `TimeoutSeconds` | `120` | HTTP request timeout |
|
|
| `MaxRetries` | `3` | Maximum retry attempts |
|
|
| `BaseDelayMs` | `1000` | Base delay for exponential backoff |
|
|
| `DefaultPollingIntervalMs` | `2000` | Default polling interval for status checks |
|
|
|
|
## Request Options
|
|
|
|
### SubmitRetrievalRequest
|
|
|
|
| Property | Type | Default | Description |
|
|
|----------|------|---------|-------------|
|
|
| `Url` | `string` | (required) | URL to retrieve |
|
|
| `TimeoutSeconds` | `int` | `60` | Max wait time (max: 300) |
|
|
| `ForceRefresh` | `bool` | `false` | Bypass cache |
|
|
| `RequiredQuorum` | `int?` | `null` | Feeders that must agree (1-10, Community mode) |
|
|
| `Priority` | `int` | `5` | Job priority (1-10) |
|
|
| `ClientId` | `Guid?` | `null` | Route to specific client |
|
|
| `CustomUserId` | `string?` | `null` | Route using custom ID |
|
|
| `PreferredPlatform` | `string?` | `null` | "Android", "iOS", or "Windows" |
|
|
| `GenerateSummary` | `bool` | `false` | Generate AI summary of page content |
|
|
|
|
## Response Models
|
|
|
|
### PageData
|
|
|
|
```csharp
|
|
public class PageData
|
|
{
|
|
public string Title { get; set; }
|
|
public string Body { get; set; }
|
|
public string? MetaDescription { get; set; }
|
|
public string Url { get; set; }
|
|
public string? FaviconUrl { get; set; }
|
|
public string? Thumbnail { get; set; } // Base64 PNG
|
|
public string? Summary { get; set; } // AI-generated summary (if requested)
|
|
public List<LinkInfo> Routes { get; set; }
|
|
public List<LinkInfo> BodyRoutes { get; set; }
|
|
}
|
|
```
|
|
|
|
### LinkInfo
|
|
|
|
```csharp
|
|
public class LinkInfo
|
|
{
|
|
public string Url { get; set; }
|
|
public string Text { get; set; }
|
|
public int Rank { get; set; }
|
|
public int Occurrences { get; set; }
|
|
public bool IsPotentialAd { get; set; }
|
|
public string? AdReason { get; set; }
|
|
}
|
|
```
|
|
|
|
## Retry & Rate Limiting
|
|
|
|
The client automatically handles:
|
|
- **Rate limits (429)**: Respects `Retry-After` header with exponential backoff
|
|
- **Server errors (500-5xx)**: Retries with exponential backoff
|
|
- **Network timeouts**: Retries with exponential backoff
|
|
- **Configurable retries**: Set `MaxRetries` in options
|
|
|
|
## API Documentation
|
|
|
|
For complete API documentation, visit: https://www.viewengine.io/docs
|
|
|
|
## Support
|
|
|
|
- Documentation: https://www.viewengine.io/docs
|
|
- GitHub: https://github.com/marketally/viewengine
|
|
- Issues: https://github.com/marketally/viewengine/issues
|
|
|
|
## License
|
|
|
|
MIT License - Copyright © 2025 ViewEngine
|