Some checks failed
Build and Release / Build Binaries (amd64, darwin) (push) Blocked by required conditions
Build and Release / Build Binaries (amd64, linux) (push) Blocked by required conditions
Build and Release / Build Binaries (amd64, windows) (push) Blocked by required conditions
Build and Release / Build Binaries (arm64, darwin) (push) Blocked by required conditions
Build and Release / Build Binaries (arm64, linux) (push) Blocked by required conditions
Build and Release / Build Docker Image (push) Blocked by required conditions
Build and Release / Create Release (push) Blocked by required conditions
Build and Release / Lint and Test (push) Has been cancelled
- Replace encoding/json with modules/json (depguard)
- Add error handling for json.Unmarshal and WriteItem calls (errcheck)
- Use slices.Contains instead of manual loops (modernize)
- Use any instead of interface{} (modernize)
- Use min/max built-in functions (modernize)
- Use strings.FieldsSeq and strings.SplitSeq (modernize)
- Use range over int for loops (modernize)
- Use http.MethodOptions constant (usestdlibvars)
- Use tagged switch statements (staticcheck)
- Use += and /= operators (gocritic)
- Fix gofumpt formatting issues
- Remove unused streamLargeFile function
- Remove unused primaryLang parameter
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
376 lines
9.0 KiB
Go
376 lines
9.0 KiB
Go
// Copyright 2026 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package wiki
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"regexp"
|
|
"slices"
|
|
"strings"
|
|
|
|
repo_model "code.gitea.io/gitea/models/repo"
|
|
"code.gitea.io/gitea/modules/git"
|
|
"code.gitea.io/gitea/modules/gitrepo"
|
|
"code.gitea.io/gitea/modules/json"
|
|
"code.gitea.io/gitea/modules/log"
|
|
)
|
|
|
|
// IndexWikiPage indexes a single wiki page for search
|
|
func IndexWikiPage(ctx context.Context, repo *repo_model.Repository, pageName string) error {
|
|
wikiRepo, commit, err := findWikiRepoCommit(ctx, repo)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if wikiRepo != nil {
|
|
defer wikiRepo.Close()
|
|
}
|
|
if commit == nil {
|
|
return nil
|
|
}
|
|
|
|
// Get the page content
|
|
pagePath := WebPathToGitPath(WebPath(pageName))
|
|
entry, err := commit.GetTreeEntryByPath(pagePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
blob := entry.Blob()
|
|
content, err := blob.GetBlobContent(1024 * 1024) // 1MB max
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Calculate hash
|
|
hash := sha256.Sum256([]byte(content))
|
|
contentHash := hex.EncodeToString(hash[:])
|
|
|
|
// Check if already indexed with same hash
|
|
existing, err := repo_model.GetWikiIndex(ctx, repo.ID, pageName)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if existing != nil && existing.ContentHash == contentHash {
|
|
return nil // Already up to date
|
|
}
|
|
|
|
// Extract links from content
|
|
links := extractWikiLinks(content)
|
|
linksJSON, _ := json.Marshal(links)
|
|
|
|
// Count words
|
|
wordCount := countWords(content)
|
|
|
|
// Get title from first heading or page name
|
|
title := extractTitle(content, pageName)
|
|
|
|
// Create/update index
|
|
idx := &repo_model.WikiIndex{
|
|
RepoID: repo.ID,
|
|
PageName: pageName,
|
|
PagePath: pagePath,
|
|
Title: title,
|
|
Content: content,
|
|
ContentHash: contentHash,
|
|
CommitSHA: commit.ID.String(),
|
|
WordCount: wordCount,
|
|
LinksOut: string(linksJSON),
|
|
}
|
|
|
|
return repo_model.CreateOrUpdateWikiIndex(ctx, idx)
|
|
}
|
|
|
|
// IndexAllWikiPages indexes all pages in a wiki
|
|
func IndexAllWikiPages(ctx context.Context, repo *repo_model.Repository) error {
|
|
wikiRepo, commit, err := findWikiRepoCommit(ctx, repo)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if wikiRepo != nil {
|
|
defer wikiRepo.Close()
|
|
}
|
|
if commit == nil {
|
|
return nil
|
|
}
|
|
|
|
// Get all entries
|
|
entries, err := commit.ListEntries()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
indexedPages := make(map[string]bool)
|
|
|
|
for _, entry := range entries {
|
|
if entry.IsDir() {
|
|
continue
|
|
}
|
|
if !strings.HasSuffix(entry.Name(), ".md") {
|
|
continue
|
|
}
|
|
|
|
webPath, err := GitPathToWebPath(entry.Name())
|
|
if err != nil {
|
|
continue
|
|
}
|
|
pageName := string(webPath)
|
|
if pageName == "" {
|
|
continue
|
|
}
|
|
|
|
if err := IndexWikiPage(ctx, repo, pageName); err != nil {
|
|
log.Warn("Failed to index wiki page %s: %v", pageName, err)
|
|
continue
|
|
}
|
|
indexedPages[pageName] = true
|
|
}
|
|
|
|
// Remove deleted pages from index
|
|
existingIndexes, err := repo_model.GetWikiIndexByRepo(ctx, repo.ID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, idx := range existingIndexes {
|
|
if !indexedPages[idx.PageName] {
|
|
if err := repo_model.DeleteWikiIndex(ctx, repo.ID, idx.PageName); err != nil {
|
|
log.Warn("Failed to remove deleted wiki page from index %s: %v", idx.PageName, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// RemoveWikiPageFromIndex removes a page from the search index
|
|
func RemoveWikiPageFromIndex(ctx context.Context, repoID int64, pageName string) error {
|
|
return repo_model.DeleteWikiIndex(ctx, repoID, pageName)
|
|
}
|
|
|
|
// ClearWikiIndex removes all indexed pages for a repository
|
|
func ClearWikiIndex(ctx context.Context, repoID int64) error {
|
|
return repo_model.DeleteWikiIndexByRepo(ctx, repoID)
|
|
}
|
|
|
|
// GetWikiGraph returns the link graph for a wiki
|
|
func GetWikiGraph(ctx context.Context, repoID int64) (
|
|
nodes []map[string]any,
|
|
edges []map[string]any,
|
|
err error,
|
|
) {
|
|
indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
nodes = make([]map[string]any, 0, len(indexes))
|
|
edges = make([]map[string]any, 0)
|
|
pageSet := make(map[string]bool)
|
|
|
|
// Build nodes
|
|
for _, idx := range indexes {
|
|
pageSet[idx.PageName] = true
|
|
nodes = append(nodes, map[string]any{
|
|
"name": idx.PageName,
|
|
"title": idx.Title,
|
|
"word_count": idx.WordCount,
|
|
})
|
|
}
|
|
|
|
// Build edges from links
|
|
for _, idx := range indexes {
|
|
var links []string
|
|
if idx.LinksOut != "" {
|
|
_ = json.Unmarshal([]byte(idx.LinksOut), &links)
|
|
}
|
|
for _, link := range links {
|
|
if pageSet[link] { // Only include links to existing pages
|
|
edges = append(edges, map[string]any{
|
|
"source": idx.PageName,
|
|
"target": link,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
return nodes, edges, nil
|
|
}
|
|
|
|
// GetWikiIncomingLinks returns pages that link to the given page
|
|
func GetWikiIncomingLinks(ctx context.Context, repoID int64, pageName string) ([]string, error) {
|
|
indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
incoming := make([]string, 0)
|
|
for _, idx := range indexes {
|
|
var links []string
|
|
if idx.LinksOut != "" {
|
|
_ = json.Unmarshal([]byte(idx.LinksOut), &links)
|
|
}
|
|
if slices.Contains(links, pageName) {
|
|
incoming = append(incoming, idx.PageName)
|
|
}
|
|
}
|
|
|
|
return incoming, nil
|
|
}
|
|
|
|
// GetOrphanedPages returns pages with no incoming links
|
|
func GetOrphanedPages(ctx context.Context, repoID int64) ([]*repo_model.WikiIndex, error) {
|
|
indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Build set of pages that are linked to
|
|
linkedPages := make(map[string]bool)
|
|
for _, idx := range indexes {
|
|
var links []string
|
|
if idx.LinksOut != "" {
|
|
_ = json.Unmarshal([]byte(idx.LinksOut), &links)
|
|
}
|
|
for _, link := range links {
|
|
linkedPages[link] = true
|
|
}
|
|
}
|
|
|
|
// Find orphaned pages (excluding Home which is always accessible)
|
|
orphaned := make([]*repo_model.WikiIndex, 0)
|
|
for _, idx := range indexes {
|
|
if idx.PageName != "Home" && !linkedPages[idx.PageName] {
|
|
orphaned = append(orphaned, idx)
|
|
}
|
|
}
|
|
|
|
return orphaned, nil
|
|
}
|
|
|
|
// GetDeadLinks returns links to non-existent pages
|
|
func GetDeadLinks(ctx context.Context, repoID int64) ([]map[string]string, error) {
|
|
indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Build set of existing pages
|
|
existingPages := make(map[string]bool)
|
|
for _, idx := range indexes {
|
|
existingPages[idx.PageName] = true
|
|
}
|
|
|
|
// Find dead links
|
|
deadLinks := make([]map[string]string, 0)
|
|
for _, idx := range indexes {
|
|
var links []string
|
|
if idx.LinksOut != "" {
|
|
_ = json.Unmarshal([]byte(idx.LinksOut), &links)
|
|
}
|
|
for _, link := range links {
|
|
if !existingPages[link] {
|
|
deadLinks = append(deadLinks, map[string]string{
|
|
"page": idx.PageName,
|
|
"broken_link": link,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
return deadLinks, nil
|
|
}
|
|
|
|
// findWikiRepoCommit opens the wiki repo and gets the latest commit
|
|
func findWikiRepoCommit(ctx context.Context, repo *repo_model.Repository) (*git.Repository, *git.Commit, error) {
|
|
wikiRepo, err := gitrepo.OpenRepository(ctx, repo.WikiStorageRepo())
|
|
if err != nil {
|
|
if git.IsErrNotExist(err) {
|
|
return nil, nil, nil
|
|
}
|
|
return nil, nil, err
|
|
}
|
|
|
|
branch := repo.DefaultWikiBranch
|
|
if branch == "" {
|
|
branch = "master"
|
|
}
|
|
|
|
commit, err := wikiRepo.GetBranchCommit(branch)
|
|
if err != nil {
|
|
wikiRepo.Close()
|
|
if git.IsErrNotExist(err) {
|
|
return nil, nil, nil
|
|
}
|
|
return nil, nil, err
|
|
}
|
|
|
|
return wikiRepo, commit, nil
|
|
}
|
|
|
|
// extractWikiLinks extracts wiki page links from markdown content
|
|
func extractWikiLinks(content string) []string {
|
|
links := make([]string, 0)
|
|
seen := make(map[string]bool)
|
|
|
|
// Match [[Page Name]] style wiki links
|
|
wikiLinkRe := regexp.MustCompile(`\[\[([^\]|]+)(?:\|[^\]]+)?\]\]`)
|
|
matches := wikiLinkRe.FindAllStringSubmatch(content, -1)
|
|
for _, match := range matches {
|
|
if len(match) > 1 {
|
|
link := strings.TrimSpace(match[1])
|
|
// Convert to page name format
|
|
link = strings.ReplaceAll(link, " ", "-")
|
|
if !seen[link] {
|
|
links = append(links, link)
|
|
seen[link] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Match [text](wiki/Page-Name) style links
|
|
mdLinkRe := regexp.MustCompile(`\[([^\]]+)\]\((?:\.\.?/)?(?:wiki/)?([^)]+)\)`)
|
|
matches = mdLinkRe.FindAllStringSubmatch(content, -1)
|
|
for _, match := range matches {
|
|
if len(match) > 2 {
|
|
link := match[2]
|
|
// Skip external links
|
|
if strings.HasPrefix(link, "http://") || strings.HasPrefix(link, "https://") {
|
|
continue
|
|
}
|
|
// Clean up the link
|
|
link = strings.TrimPrefix(link, "./")
|
|
link = strings.TrimSuffix(link, ".md")
|
|
if !seen[link] && link != "" {
|
|
links = append(links, link)
|
|
seen[link] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
return links
|
|
}
|
|
|
|
// extractTitle extracts the title from markdown content
|
|
func extractTitle(content, defaultTitle string) string {
|
|
// Look for first H1 heading
|
|
for line := range strings.SplitSeq(content, "\n") {
|
|
line = strings.TrimSpace(line)
|
|
if title, ok := strings.CutPrefix(line, "# "); ok {
|
|
return title
|
|
}
|
|
}
|
|
return defaultTitle
|
|
}
|
|
|
|
// countWords counts the number of words in content
|
|
func countWords(content string) int {
|
|
// Remove markdown formatting
|
|
content = regexp.MustCompile(`[#*_\[\](){}]`).ReplaceAllString(content, " ")
|
|
// Split on whitespace
|
|
words := strings.Fields(content)
|
|
return len(words)
|
|
}
|