Files
gitea/services/wiki/wiki_index.go
logikonline dfc94f6408
Some checks failed
Build and Release / Build Binaries (amd64, darwin) (push) Blocked by required conditions
Build and Release / Build Binaries (amd64, linux) (push) Blocked by required conditions
Build and Release / Build Binaries (amd64, windows) (push) Blocked by required conditions
Build and Release / Build Binaries (arm64, darwin) (push) Blocked by required conditions
Build and Release / Build Binaries (arm64, linux) (push) Blocked by required conditions
Build and Release / Build Docker Image (push) Blocked by required conditions
Build and Release / Create Release (push) Blocked by required conditions
Build and Release / Lint and Test (push) Has been cancelled
fix: resolve all golangci-lint errors in v2 API
- Replace encoding/json with modules/json (depguard)
- Add error handling for json.Unmarshal and WriteItem calls (errcheck)
- Use slices.Contains instead of manual loops (modernize)
- Use any instead of interface{} (modernize)
- Use min/max built-in functions (modernize)
- Use strings.FieldsSeq and strings.SplitSeq (modernize)
- Use range over int for loops (modernize)
- Use http.MethodOptions constant (usestdlibvars)
- Use tagged switch statements (staticcheck)
- Use += and /= operators (gocritic)
- Fix gofumpt formatting issues
- Remove unused streamLargeFile function
- Remove unused primaryLang parameter

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-09 15:49:52 -05:00

376 lines
9.0 KiB
Go

// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package wiki
import (
"context"
"crypto/sha256"
"encoding/hex"
"regexp"
"slices"
"strings"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
)
// IndexWikiPage indexes a single wiki page for search
func IndexWikiPage(ctx context.Context, repo *repo_model.Repository, pageName string) error {
wikiRepo, commit, err := findWikiRepoCommit(ctx, repo)
if err != nil {
return err
}
if wikiRepo != nil {
defer wikiRepo.Close()
}
if commit == nil {
return nil
}
// Get the page content
pagePath := WebPathToGitPath(WebPath(pageName))
entry, err := commit.GetTreeEntryByPath(pagePath)
if err != nil {
return err
}
blob := entry.Blob()
content, err := blob.GetBlobContent(1024 * 1024) // 1MB max
if err != nil {
return err
}
// Calculate hash
hash := sha256.Sum256([]byte(content))
contentHash := hex.EncodeToString(hash[:])
// Check if already indexed with same hash
existing, err := repo_model.GetWikiIndex(ctx, repo.ID, pageName)
if err != nil {
return err
}
if existing != nil && existing.ContentHash == contentHash {
return nil // Already up to date
}
// Extract links from content
links := extractWikiLinks(content)
linksJSON, _ := json.Marshal(links)
// Count words
wordCount := countWords(content)
// Get title from first heading or page name
title := extractTitle(content, pageName)
// Create/update index
idx := &repo_model.WikiIndex{
RepoID: repo.ID,
PageName: pageName,
PagePath: pagePath,
Title: title,
Content: content,
ContentHash: contentHash,
CommitSHA: commit.ID.String(),
WordCount: wordCount,
LinksOut: string(linksJSON),
}
return repo_model.CreateOrUpdateWikiIndex(ctx, idx)
}
// IndexAllWikiPages indexes all pages in a wiki
func IndexAllWikiPages(ctx context.Context, repo *repo_model.Repository) error {
wikiRepo, commit, err := findWikiRepoCommit(ctx, repo)
if err != nil {
return err
}
if wikiRepo != nil {
defer wikiRepo.Close()
}
if commit == nil {
return nil
}
// Get all entries
entries, err := commit.ListEntries()
if err != nil {
return err
}
indexedPages := make(map[string]bool)
for _, entry := range entries {
if entry.IsDir() {
continue
}
if !strings.HasSuffix(entry.Name(), ".md") {
continue
}
webPath, err := GitPathToWebPath(entry.Name())
if err != nil {
continue
}
pageName := string(webPath)
if pageName == "" {
continue
}
if err := IndexWikiPage(ctx, repo, pageName); err != nil {
log.Warn("Failed to index wiki page %s: %v", pageName, err)
continue
}
indexedPages[pageName] = true
}
// Remove deleted pages from index
existingIndexes, err := repo_model.GetWikiIndexByRepo(ctx, repo.ID)
if err != nil {
return err
}
for _, idx := range existingIndexes {
if !indexedPages[idx.PageName] {
if err := repo_model.DeleteWikiIndex(ctx, repo.ID, idx.PageName); err != nil {
log.Warn("Failed to remove deleted wiki page from index %s: %v", idx.PageName, err)
}
}
}
return nil
}
// RemoveWikiPageFromIndex removes a page from the search index
func RemoveWikiPageFromIndex(ctx context.Context, repoID int64, pageName string) error {
return repo_model.DeleteWikiIndex(ctx, repoID, pageName)
}
// ClearWikiIndex removes all indexed pages for a repository
func ClearWikiIndex(ctx context.Context, repoID int64) error {
return repo_model.DeleteWikiIndexByRepo(ctx, repoID)
}
// GetWikiGraph returns the link graph for a wiki
func GetWikiGraph(ctx context.Context, repoID int64) (
nodes []map[string]any,
edges []map[string]any,
err error,
) {
indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID)
if err != nil {
return nil, nil, err
}
nodes = make([]map[string]any, 0, len(indexes))
edges = make([]map[string]any, 0)
pageSet := make(map[string]bool)
// Build nodes
for _, idx := range indexes {
pageSet[idx.PageName] = true
nodes = append(nodes, map[string]any{
"name": idx.PageName,
"title": idx.Title,
"word_count": idx.WordCount,
})
}
// Build edges from links
for _, idx := range indexes {
var links []string
if idx.LinksOut != "" {
_ = json.Unmarshal([]byte(idx.LinksOut), &links)
}
for _, link := range links {
if pageSet[link] { // Only include links to existing pages
edges = append(edges, map[string]any{
"source": idx.PageName,
"target": link,
})
}
}
}
return nodes, edges, nil
}
// GetWikiIncomingLinks returns pages that link to the given page
func GetWikiIncomingLinks(ctx context.Context, repoID int64, pageName string) ([]string, error) {
indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID)
if err != nil {
return nil, err
}
incoming := make([]string, 0)
for _, idx := range indexes {
var links []string
if idx.LinksOut != "" {
_ = json.Unmarshal([]byte(idx.LinksOut), &links)
}
if slices.Contains(links, pageName) {
incoming = append(incoming, idx.PageName)
}
}
return incoming, nil
}
// GetOrphanedPages returns pages with no incoming links
func GetOrphanedPages(ctx context.Context, repoID int64) ([]*repo_model.WikiIndex, error) {
indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID)
if err != nil {
return nil, err
}
// Build set of pages that are linked to
linkedPages := make(map[string]bool)
for _, idx := range indexes {
var links []string
if idx.LinksOut != "" {
_ = json.Unmarshal([]byte(idx.LinksOut), &links)
}
for _, link := range links {
linkedPages[link] = true
}
}
// Find orphaned pages (excluding Home which is always accessible)
orphaned := make([]*repo_model.WikiIndex, 0)
for _, idx := range indexes {
if idx.PageName != "Home" && !linkedPages[idx.PageName] {
orphaned = append(orphaned, idx)
}
}
return orphaned, nil
}
// GetDeadLinks returns links to non-existent pages
func GetDeadLinks(ctx context.Context, repoID int64) ([]map[string]string, error) {
indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID)
if err != nil {
return nil, err
}
// Build set of existing pages
existingPages := make(map[string]bool)
for _, idx := range indexes {
existingPages[idx.PageName] = true
}
// Find dead links
deadLinks := make([]map[string]string, 0)
for _, idx := range indexes {
var links []string
if idx.LinksOut != "" {
_ = json.Unmarshal([]byte(idx.LinksOut), &links)
}
for _, link := range links {
if !existingPages[link] {
deadLinks = append(deadLinks, map[string]string{
"page": idx.PageName,
"broken_link": link,
})
}
}
}
return deadLinks, nil
}
// findWikiRepoCommit opens the wiki repo and gets the latest commit
func findWikiRepoCommit(ctx context.Context, repo *repo_model.Repository) (*git.Repository, *git.Commit, error) {
wikiRepo, err := gitrepo.OpenRepository(ctx, repo.WikiStorageRepo())
if err != nil {
if git.IsErrNotExist(err) {
return nil, nil, nil
}
return nil, nil, err
}
branch := repo.DefaultWikiBranch
if branch == "" {
branch = "master"
}
commit, err := wikiRepo.GetBranchCommit(branch)
if err != nil {
wikiRepo.Close()
if git.IsErrNotExist(err) {
return nil, nil, nil
}
return nil, nil, err
}
return wikiRepo, commit, nil
}
// extractWikiLinks extracts wiki page links from markdown content
func extractWikiLinks(content string) []string {
links := make([]string, 0)
seen := make(map[string]bool)
// Match [[Page Name]] style wiki links
wikiLinkRe := regexp.MustCompile(`\[\[([^\]|]+)(?:\|[^\]]+)?\]\]`)
matches := wikiLinkRe.FindAllStringSubmatch(content, -1)
for _, match := range matches {
if len(match) > 1 {
link := strings.TrimSpace(match[1])
// Convert to page name format
link = strings.ReplaceAll(link, " ", "-")
if !seen[link] {
links = append(links, link)
seen[link] = true
}
}
}
// Match [text](wiki/Page-Name) style links
mdLinkRe := regexp.MustCompile(`\[([^\]]+)\]\((?:\.\.?/)?(?:wiki/)?([^)]+)\)`)
matches = mdLinkRe.FindAllStringSubmatch(content, -1)
for _, match := range matches {
if len(match) > 2 {
link := match[2]
// Skip external links
if strings.HasPrefix(link, "http://") || strings.HasPrefix(link, "https://") {
continue
}
// Clean up the link
link = strings.TrimPrefix(link, "./")
link = strings.TrimSuffix(link, ".md")
if !seen[link] && link != "" {
links = append(links, link)
seen[link] = true
}
}
}
return links
}
// extractTitle extracts the title from markdown content
func extractTitle(content, defaultTitle string) string {
// Look for first H1 heading
for line := range strings.SplitSeq(content, "\n") {
line = strings.TrimSpace(line)
if title, ok := strings.CutPrefix(line, "# "); ok {
return title
}
}
return defaultTitle
}
// countWords counts the number of words in content
func countWords(content string) int {
// Remove markdown formatting
content = regexp.MustCompile(`[#*_\[\](){}]`).ReplaceAllString(content, " ")
// Split on whitespace
words := strings.Fields(content)
return len(words)
}