- modules/pages/config.go: use slices.Contains for template validation - modules/webhook/retry.go: use slices.Contains for retryable status codes - routers/api/v1/org/profile.go: extract helper to remove duplicate code - cmd/gitea-cli/cmd/upload.go: apply gofumpt formatting, add nolint directive for waitgroup 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
376 lines
9.0 KiB
Go
376 lines
9.0 KiB
Go
// Copyright 2026 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
// Package health provides comprehensive health checking for Gitea services.
|
|
package health
|
|
|
|
import (
|
|
"context"
|
|
"maps"
|
|
"runtime"
|
|
"sync"
|
|
"time"
|
|
|
|
"code.gitea.io/gitea/modules/circuitbreaker"
|
|
"code.gitea.io/gitea/modules/log"
|
|
"code.gitea.io/gitea/modules/setting"
|
|
)
|
|
|
|
// Status represents the health status of a component
|
|
type Status string
|
|
|
|
const (
|
|
StatusHealthy Status = "healthy"
|
|
StatusDegraded Status = "degraded"
|
|
StatusUnhealthy Status = "unhealthy"
|
|
StatusUnknown Status = "unknown"
|
|
)
|
|
|
|
// ComponentCheck represents a health check for a single component
|
|
type ComponentCheck struct {
|
|
Name string `json:"name"`
|
|
Status Status `json:"status"`
|
|
Message string `json:"message,omitempty"`
|
|
Duration time.Duration `json:"duration_ms"`
|
|
LastChecked time.Time `json:"last_checked"`
|
|
Metadata map[string]any `json:"metadata,omitempty"`
|
|
}
|
|
|
|
// Response represents the complete health check response
|
|
type Response struct {
|
|
Status Status `json:"status"`
|
|
Version string `json:"version"`
|
|
Uptime time.Duration `json:"uptime_seconds"`
|
|
Timestamp time.Time `json:"timestamp"`
|
|
Components map[string]*ComponentCheck `json:"components"`
|
|
System *SystemInfo `json:"system,omitempty"`
|
|
Circuits map[string]circuitbreaker.Stats `json:"circuit_breakers,omitempty"`
|
|
}
|
|
|
|
// SystemInfo contains system-level health information
|
|
type SystemInfo struct {
|
|
GoVersion string `json:"go_version"`
|
|
NumGoroutines int `json:"goroutines"`
|
|
MemoryAllocMB float64 `json:"memory_alloc_mb"`
|
|
MemorySysMB float64 `json:"memory_sys_mb"`
|
|
NumCPU int `json:"num_cpu"`
|
|
GOMAXPROCS int `json:"gomaxprocs"`
|
|
}
|
|
|
|
// Checker is a function that performs a health check
|
|
type Checker func(ctx context.Context) *ComponentCheck
|
|
|
|
// Manager manages health checks
|
|
type Manager struct {
|
|
mu sync.RWMutex
|
|
checkers map[string]Checker
|
|
cache map[string]*ComponentCheck
|
|
cacheTTL time.Duration
|
|
startTime time.Time
|
|
version string
|
|
}
|
|
|
|
var (
|
|
defaultManager *Manager
|
|
managerOnce sync.Once
|
|
)
|
|
|
|
// GetManager returns the default health manager
|
|
func GetManager() *Manager {
|
|
managerOnce.Do(func() {
|
|
defaultManager = &Manager{
|
|
checkers: make(map[string]Checker),
|
|
cache: make(map[string]*ComponentCheck),
|
|
cacheTTL: 5 * time.Second,
|
|
startTime: time.Now(),
|
|
version: setting.AppVer,
|
|
}
|
|
})
|
|
return defaultManager
|
|
}
|
|
|
|
// RegisterChecker registers a new health checker
|
|
func (m *Manager) RegisterChecker(name string, checker Checker) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
m.checkers[name] = checker
|
|
log.Debug("Registered health checker: %s", name)
|
|
}
|
|
|
|
// UnregisterChecker removes a health checker
|
|
func (m *Manager) UnregisterChecker(name string) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
delete(m.checkers, name)
|
|
delete(m.cache, name)
|
|
}
|
|
|
|
// Check performs all health checks
|
|
func (m *Manager) Check(ctx context.Context, includeSystem bool) *Response {
|
|
m.mu.RLock()
|
|
checkers := maps.Clone(m.checkers)
|
|
m.mu.RUnlock()
|
|
|
|
response := &Response{
|
|
Status: StatusHealthy,
|
|
Version: m.version,
|
|
Uptime: time.Since(m.startTime),
|
|
Timestamp: time.Now(),
|
|
Components: make(map[string]*ComponentCheck),
|
|
}
|
|
|
|
// Run checks concurrently
|
|
var wg sync.WaitGroup
|
|
var mu sync.Mutex
|
|
|
|
for name, checker := range checkers {
|
|
wg.Add(1)
|
|
go func(name string, checker Checker) {
|
|
defer wg.Done()
|
|
|
|
result := m.runCheck(ctx, name, checker)
|
|
|
|
mu.Lock()
|
|
response.Components[name] = result
|
|
mu.Unlock()
|
|
}(name, checker)
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
// Calculate overall status
|
|
response.Status = m.calculateOverallStatus(response.Components)
|
|
|
|
// Include system info if requested
|
|
if includeSystem {
|
|
response.System = getSystemInfo()
|
|
response.Circuits = circuitbreaker.GetRegistry().Stats()
|
|
}
|
|
|
|
return response
|
|
}
|
|
|
|
func (m *Manager) runCheck(ctx context.Context, name string, checker Checker) *ComponentCheck {
|
|
// Check cache first
|
|
m.mu.RLock()
|
|
cached, ok := m.cache[name]
|
|
m.mu.RUnlock()
|
|
|
|
if ok && time.Since(cached.LastChecked) < m.cacheTTL {
|
|
return cached
|
|
}
|
|
|
|
// Run the check with timeout
|
|
checkCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
|
defer cancel()
|
|
|
|
start := time.Now()
|
|
result := checker(checkCtx)
|
|
result.Duration = time.Since(start)
|
|
result.LastChecked = time.Now()
|
|
|
|
// Cache the result
|
|
m.mu.Lock()
|
|
m.cache[name] = result
|
|
m.mu.Unlock()
|
|
|
|
return result
|
|
}
|
|
|
|
func (m *Manager) calculateOverallStatus(components map[string]*ComponentCheck) Status {
|
|
unhealthyCount := 0
|
|
degradedCount := 0
|
|
|
|
for _, check := range components {
|
|
switch check.Status {
|
|
case StatusUnhealthy:
|
|
unhealthyCount++
|
|
case StatusDegraded:
|
|
degradedCount++
|
|
}
|
|
}
|
|
|
|
// If any critical component is unhealthy, overall is unhealthy
|
|
if unhealthyCount > 0 {
|
|
return StatusUnhealthy
|
|
}
|
|
|
|
// If any component is degraded, overall is degraded
|
|
if degradedCount > 0 {
|
|
return StatusDegraded
|
|
}
|
|
|
|
return StatusHealthy
|
|
}
|
|
|
|
func getSystemInfo() *SystemInfo {
|
|
var memStats runtime.MemStats
|
|
runtime.ReadMemStats(&memStats)
|
|
|
|
return &SystemInfo{
|
|
GoVersion: runtime.Version(),
|
|
NumGoroutines: runtime.NumGoroutine(),
|
|
MemoryAllocMB: float64(memStats.Alloc) / 1024 / 1024,
|
|
MemorySysMB: float64(memStats.Sys) / 1024 / 1024,
|
|
NumCPU: runtime.NumCPU(),
|
|
GOMAXPROCS: runtime.GOMAXPROCS(0),
|
|
}
|
|
}
|
|
|
|
// CheckSingle performs a single component check
|
|
func (m *Manager) CheckSingle(ctx context.Context, name string) (*ComponentCheck, bool) {
|
|
m.mu.RLock()
|
|
checker, ok := m.checkers[name]
|
|
m.mu.RUnlock()
|
|
|
|
if !ok {
|
|
return nil, false
|
|
}
|
|
|
|
return m.runCheck(ctx, name, checker), true
|
|
}
|
|
|
|
// LivenessCheck performs a quick liveness check (is the service running?)
|
|
func (m *Manager) LivenessCheck() *ComponentCheck {
|
|
return &ComponentCheck{
|
|
Name: "liveness",
|
|
Status: StatusHealthy,
|
|
Message: "service is running",
|
|
LastChecked: time.Now(),
|
|
}
|
|
}
|
|
|
|
// ReadinessCheck performs a readiness check (can the service handle requests?)
|
|
func (m *Manager) ReadinessCheck(ctx context.Context) *Response {
|
|
// For readiness, we only check critical components
|
|
return m.Check(ctx, false)
|
|
}
|
|
|
|
// NewDatabaseChecker creates a database health checker
|
|
func NewDatabaseChecker(pingFunc func(ctx context.Context) error) Checker {
|
|
return func(ctx context.Context) *ComponentCheck {
|
|
check := &ComponentCheck{
|
|
Name: "database",
|
|
}
|
|
|
|
if err := pingFunc(ctx); err != nil {
|
|
check.Status = StatusUnhealthy
|
|
check.Message = err.Error()
|
|
} else {
|
|
check.Status = StatusHealthy
|
|
check.Message = "connected"
|
|
}
|
|
|
|
return check
|
|
}
|
|
}
|
|
|
|
// NewCacheChecker creates a cache health checker
|
|
func NewCacheChecker(pingFunc func(ctx context.Context) error) Checker {
|
|
return func(ctx context.Context) *ComponentCheck {
|
|
check := &ComponentCheck{
|
|
Name: "cache",
|
|
}
|
|
|
|
if err := pingFunc(ctx); err != nil {
|
|
check.Status = StatusDegraded // Cache is usually not critical
|
|
check.Message = err.Error()
|
|
} else {
|
|
check.Status = StatusHealthy
|
|
check.Message = "connected"
|
|
}
|
|
|
|
return check
|
|
}
|
|
}
|
|
|
|
// NewGitChecker creates a git service health checker
|
|
func NewGitChecker(checkFunc func(ctx context.Context) (string, error)) Checker {
|
|
return func(ctx context.Context) *ComponentCheck {
|
|
check := &ComponentCheck{
|
|
Name: "git",
|
|
}
|
|
|
|
version, err := checkFunc(ctx)
|
|
if err != nil {
|
|
check.Status = StatusUnhealthy
|
|
check.Message = err.Error()
|
|
} else {
|
|
check.Status = StatusHealthy
|
|
check.Message = "available"
|
|
check.Metadata = map[string]any{
|
|
"version": version,
|
|
}
|
|
}
|
|
|
|
return check
|
|
}
|
|
}
|
|
|
|
// NewSSHChecker creates an SSH service health checker
|
|
func NewSSHChecker(isEnabled bool, port int) Checker {
|
|
return func(ctx context.Context) *ComponentCheck {
|
|
check := &ComponentCheck{
|
|
Name: "ssh",
|
|
}
|
|
|
|
if !isEnabled {
|
|
check.Status = StatusHealthy
|
|
check.Message = "disabled"
|
|
} else {
|
|
check.Status = StatusHealthy
|
|
check.Message = "listening"
|
|
check.Metadata = map[string]any{
|
|
"port": port,
|
|
}
|
|
}
|
|
|
|
return check
|
|
}
|
|
}
|
|
|
|
// NewExternalServiceChecker creates a checker for external services
|
|
func NewExternalServiceChecker(name string, checkFunc func(ctx context.Context) error) Checker {
|
|
return func(ctx context.Context) *ComponentCheck {
|
|
check := &ComponentCheck{
|
|
Name: name,
|
|
}
|
|
|
|
if err := checkFunc(ctx); err != nil {
|
|
check.Status = StatusDegraded
|
|
check.Message = err.Error()
|
|
} else {
|
|
check.Status = StatusHealthy
|
|
check.Message = "connected"
|
|
}
|
|
|
|
return check
|
|
}
|
|
}
|
|
|
|
// NewQueueChecker creates a checker for the task queue
|
|
func NewQueueChecker(getQueueStats func() (pending, processing int)) Checker {
|
|
return func(ctx context.Context) *ComponentCheck {
|
|
check := &ComponentCheck{
|
|
Name: "queue",
|
|
}
|
|
|
|
pending, processing := getQueueStats()
|
|
|
|
check.Status = StatusHealthy
|
|
check.Message = "running"
|
|
check.Metadata = map[string]any{
|
|
"pending": pending,
|
|
"processing": processing,
|
|
}
|
|
|
|
// Mark as degraded if queue is backing up
|
|
if pending > 10000 {
|
|
check.Status = StatusDegraded
|
|
check.Message = "queue backlog is high"
|
|
}
|
|
|
|
return check
|
|
}
|
|
}
|