// Copyright 2026 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package wiki import ( "context" "crypto/sha256" "encoding/hex" "encoding/json" "regexp" "strings" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" ) // IndexWikiPage indexes a single wiki page for search func IndexWikiPage(ctx context.Context, repo *repo_model.Repository, pageName string) error { wikiRepo, commit, err := findWikiRepoCommit(ctx, repo) if err != nil { return err } if wikiRepo != nil { defer wikiRepo.Close() } if commit == nil { return nil } // Get the page content pagePath := NameToFilename(pageName) entry, err := commit.GetTreeEntryByPath(pagePath) if err != nil { return err } blob := entry.Blob() content, err := blob.GetBlobContent(1024 * 1024) // 1MB max if err != nil { return err } // Calculate hash hash := sha256.Sum256([]byte(content)) contentHash := hex.EncodeToString(hash[:]) // Check if already indexed with same hash existing, err := repo_model.GetWikiIndex(ctx, repo.ID, pageName) if err != nil { return err } if existing != nil && existing.ContentHash == contentHash { return nil // Already up to date } // Extract links from content links := extractWikiLinks(content) linksJSON, _ := json.Marshal(links) // Count words wordCount := countWords(content) // Get title from first heading or page name title := extractTitle(content, pageName) // Create/update index idx := &repo_model.WikiIndex{ RepoID: repo.ID, PageName: pageName, PagePath: pagePath, Title: title, Content: content, ContentHash: contentHash, CommitSHA: commit.ID.String(), WordCount: wordCount, LinksOut: string(linksJSON), } return repo_model.CreateOrUpdateWikiIndex(ctx, idx) } // IndexAllWikiPages indexes all pages in a wiki func IndexAllWikiPages(ctx context.Context, repo *repo_model.Repository) error { wikiRepo, commit, err := findWikiRepoCommit(ctx, repo) if err != nil { return err } if wikiRepo != nil { defer wikiRepo.Close() } if commit == nil { return nil } // Get all entries entries, err := commit.ListEntries() if err != nil { return err } indexedPages := make(map[string]bool) for _, entry := range entries { if entry.IsDir() { continue } if !strings.HasSuffix(entry.Name(), ".md") { continue } pageName := FilenameToName(entry.Name()) if pageName == "" { continue } if err := IndexWikiPage(ctx, repo, pageName); err != nil { log.Warn("Failed to index wiki page %s: %v", pageName, err) continue } indexedPages[pageName] = true } // Remove deleted pages from index existingIndexes, err := repo_model.GetWikiIndexByRepo(ctx, repo.ID) if err != nil { return err } for _, idx := range existingIndexes { if !indexedPages[idx.PageName] { if err := repo_model.DeleteWikiIndex(ctx, repo.ID, idx.PageName); err != nil { log.Warn("Failed to remove deleted wiki page from index %s: %v", idx.PageName, err) } } } return nil } // RemoveWikiPageFromIndex removes a page from the search index func RemoveWikiPageFromIndex(ctx context.Context, repoID int64, pageName string) error { return repo_model.DeleteWikiIndex(ctx, repoID, pageName) } // ClearWikiIndex removes all indexed pages for a repository func ClearWikiIndex(ctx context.Context, repoID int64) error { return repo_model.DeleteWikiIndexByRepo(ctx, repoID) } // GetWikiGraph returns the link graph for a wiki func GetWikiGraph(ctx context.Context, repoID int64) (nodes []map[string]interface{}, edges []map[string]interface{}, err error) { indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID) if err != nil { return nil, nil, err } nodes = make([]map[string]interface{}, 0, len(indexes)) edges = make([]map[string]interface{}, 0) pageSet := make(map[string]bool) // Build nodes for _, idx := range indexes { pageSet[idx.PageName] = true nodes = append(nodes, map[string]interface{}{ "name": idx.PageName, "title": idx.Title, "word_count": idx.WordCount, }) } // Build edges from links for _, idx := range indexes { var links []string if idx.LinksOut != "" { json.Unmarshal([]byte(idx.LinksOut), &links) } for _, link := range links { if pageSet[link] { // Only include links to existing pages edges = append(edges, map[string]interface{}{ "source": idx.PageName, "target": link, }) } } } return nodes, edges, nil } // GetWikiIncomingLinks returns pages that link to the given page func GetWikiIncomingLinks(ctx context.Context, repoID int64, pageName string) ([]string, error) { indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID) if err != nil { return nil, err } incoming := make([]string, 0) for _, idx := range indexes { var links []string if idx.LinksOut != "" { json.Unmarshal([]byte(idx.LinksOut), &links) } for _, link := range links { if link == pageName { incoming = append(incoming, idx.PageName) break } } } return incoming, nil } // GetOrphanedPages returns pages with no incoming links func GetOrphanedPages(ctx context.Context, repoID int64) ([]*repo_model.WikiIndex, error) { indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID) if err != nil { return nil, err } // Build set of pages that are linked to linkedPages := make(map[string]bool) for _, idx := range indexes { var links []string if idx.LinksOut != "" { json.Unmarshal([]byte(idx.LinksOut), &links) } for _, link := range links { linkedPages[link] = true } } // Find orphaned pages (excluding Home which is always accessible) orphaned := make([]*repo_model.WikiIndex, 0) for _, idx := range indexes { if idx.PageName != "Home" && !linkedPages[idx.PageName] { orphaned = append(orphaned, idx) } } return orphaned, nil } // GetDeadLinks returns links to non-existent pages func GetDeadLinks(ctx context.Context, repoID int64) ([]map[string]string, error) { indexes, err := repo_model.GetWikiIndexByRepo(ctx, repoID) if err != nil { return nil, err } // Build set of existing pages existingPages := make(map[string]bool) for _, idx := range indexes { existingPages[idx.PageName] = true } // Find dead links deadLinks := make([]map[string]string, 0) for _, idx := range indexes { var links []string if idx.LinksOut != "" { json.Unmarshal([]byte(idx.LinksOut), &links) } for _, link := range links { if !existingPages[link] { deadLinks = append(deadLinks, map[string]string{ "page": idx.PageName, "broken_link": link, }) } } } return deadLinks, nil } // findWikiRepoCommit opens the wiki repo and gets the latest commit func findWikiRepoCommit(ctx context.Context, repo *repo_model.Repository) (*git.Repository, *git.Commit, error) { wikiPath := repo.WikiPath() if !git.IsRepoURLAccessible(ctx, wikiPath) { return nil, nil, nil } wikiRepo, err := git.OpenRepository(ctx, wikiPath) if err != nil { return nil, nil, err } branch := repo.DefaultWikiBranch if branch == "" { branch = "master" } commit, err := wikiRepo.GetBranchCommit(branch) if err != nil { wikiRepo.Close() return nil, nil, err } return wikiRepo, commit, nil } // extractWikiLinks extracts wiki page links from markdown content func extractWikiLinks(content string) []string { links := make([]string, 0) seen := make(map[string]bool) // Match [[Page Name]] style wiki links wikiLinkRe := regexp.MustCompile(`\[\[([^\]|]+)(?:\|[^\]]+)?\]\]`) matches := wikiLinkRe.FindAllStringSubmatch(content, -1) for _, match := range matches { if len(match) > 1 { link := strings.TrimSpace(match[1]) // Convert to page name format link = strings.ReplaceAll(link, " ", "-") if !seen[link] { links = append(links, link) seen[link] = true } } } // Match [text](wiki/Page-Name) style links mdLinkRe := regexp.MustCompile(`\[([^\]]+)\]\((?:\.\.?/)?(?:wiki/)?([^)]+)\)`) matches = mdLinkRe.FindAllStringSubmatch(content, -1) for _, match := range matches { if len(match) > 2 { link := match[2] // Skip external links if strings.HasPrefix(link, "http://") || strings.HasPrefix(link, "https://") { continue } // Clean up the link link = strings.TrimPrefix(link, "./") link = strings.TrimSuffix(link, ".md") if !seen[link] && link != "" { links = append(links, link) seen[link] = true } } } return links } // extractTitle extracts the title from markdown content func extractTitle(content, defaultTitle string) string { // Look for first H1 heading lines := strings.Split(content, "\n") for _, line := range lines { line = strings.TrimSpace(line) if strings.HasPrefix(line, "# ") { return strings.TrimPrefix(line, "# ") } } return defaultTitle } // countWords counts the number of words in content func countWords(content string) int { // Remove markdown formatting content = regexp.MustCompile(`[#*_\[\](){}]`).ReplaceAllString(content, " ") // Split on whitespace words := strings.Fields(content) return len(words) }