gitea/modules/health/health.go
logikonline 81bb23f0da fix: resolve golangci-lint errors (batch 1)
- cmd/gitea-cli: fix errcheck, perfsprint, use modules/json, http constants
- models/migrations: remove unused nolint directive
- models/organization: interface{} -> any
- modules/health: rename HealthResponse -> Response to avoid stutter
- modules/idempotency: use modules/json, fix errcheck, rename IdempotencyInfo -> Info
- modules/structs: fix Verified_At naming, use omitzero

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-09 17:46:44 -05:00

378 lines
9.1 KiB
Go

// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
// Package health provides comprehensive health checking for Gitea services.
package health
import (
"context"
"runtime"
"sync"
"time"
"code.gitea.io/gitea/modules/circuitbreaker"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
)
// Status represents the health status of a component
type Status string
const (
StatusHealthy Status = "healthy"
StatusDegraded Status = "degraded"
StatusUnhealthy Status = "unhealthy"
StatusUnknown Status = "unknown"
)
// ComponentCheck represents a health check for a single component
type ComponentCheck struct {
Name string `json:"name"`
Status Status `json:"status"`
Message string `json:"message,omitempty"`
Duration time.Duration `json:"duration_ms"`
LastChecked time.Time `json:"last_checked"`
Metadata map[string]any `json:"metadata,omitempty"`
}
// Response represents the complete health check response
type Response struct {
Status Status `json:"status"`
Version string `json:"version"`
Uptime time.Duration `json:"uptime_seconds"`
Timestamp time.Time `json:"timestamp"`
Components map[string]*ComponentCheck `json:"components"`
System *SystemInfo `json:"system,omitempty"`
Circuits map[string]circuitbreaker.Stats `json:"circuit_breakers,omitempty"`
}
// SystemInfo contains system-level health information
type SystemInfo struct {
GoVersion string `json:"go_version"`
NumGoroutines int `json:"goroutines"`
MemoryAllocMB float64 `json:"memory_alloc_mb"`
MemorySysMB float64 `json:"memory_sys_mb"`
NumCPU int `json:"num_cpu"`
GOMAXPROCS int `json:"gomaxprocs"`
}
// Checker is a function that performs a health check
type Checker func(ctx context.Context) *ComponentCheck
// Manager manages health checks
type Manager struct {
mu sync.RWMutex
checkers map[string]Checker
cache map[string]*ComponentCheck
cacheTTL time.Duration
startTime time.Time
version string
}
var (
defaultManager *Manager
managerOnce sync.Once
)
// GetManager returns the default health manager
func GetManager() *Manager {
managerOnce.Do(func() {
defaultManager = &Manager{
checkers: make(map[string]Checker),
cache: make(map[string]*ComponentCheck),
cacheTTL: 5 * time.Second,
startTime: time.Now(),
version: setting.AppVer,
}
})
return defaultManager
}
// RegisterChecker registers a new health checker
func (m *Manager) RegisterChecker(name string, checker Checker) {
m.mu.Lock()
defer m.mu.Unlock()
m.checkers[name] = checker
log.Debug("Registered health checker: %s", name)
}
// UnregisterChecker removes a health checker
func (m *Manager) UnregisterChecker(name string) {
m.mu.Lock()
defer m.mu.Unlock()
delete(m.checkers, name)
delete(m.cache, name)
}
// Check performs all health checks
func (m *Manager) Check(ctx context.Context, includeSystem bool) *Response {
m.mu.RLock()
checkers := make(map[string]Checker)
for k, v := range m.checkers {
checkers[k] = v
}
m.mu.RUnlock()
response := &Response{
Status: StatusHealthy,
Version: m.version,
Uptime: time.Since(m.startTime),
Timestamp: time.Now(),
Components: make(map[string]*ComponentCheck),
}
// Run checks concurrently
var wg sync.WaitGroup
var mu sync.Mutex
for name, checker := range checkers {
wg.Add(1)
go func(name string, checker Checker) {
defer wg.Done()
result := m.runCheck(ctx, name, checker)
mu.Lock()
response.Components[name] = result
mu.Unlock()
}(name, checker)
}
wg.Wait()
// Calculate overall status
response.Status = m.calculateOverallStatus(response.Components)
// Include system info if requested
if includeSystem {
response.System = getSystemInfo()
response.Circuits = circuitbreaker.GetRegistry().Stats()
}
return response
}
func (m *Manager) runCheck(ctx context.Context, name string, checker Checker) *ComponentCheck {
// Check cache first
m.mu.RLock()
cached, ok := m.cache[name]
m.mu.RUnlock()
if ok && time.Since(cached.LastChecked) < m.cacheTTL {
return cached
}
// Run the check with timeout
checkCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
start := time.Now()
result := checker(checkCtx)
result.Duration = time.Since(start)
result.LastChecked = time.Now()
// Cache the result
m.mu.Lock()
m.cache[name] = result
m.mu.Unlock()
return result
}
func (m *Manager) calculateOverallStatus(components map[string]*ComponentCheck) Status {
unhealthyCount := 0
degradedCount := 0
for _, check := range components {
switch check.Status {
case StatusUnhealthy:
unhealthyCount++
case StatusDegraded:
degradedCount++
}
}
// If any critical component is unhealthy, overall is unhealthy
if unhealthyCount > 0 {
return StatusUnhealthy
}
// If any component is degraded, overall is degraded
if degradedCount > 0 {
return StatusDegraded
}
return StatusHealthy
}
func getSystemInfo() *SystemInfo {
var memStats runtime.MemStats
runtime.ReadMemStats(&memStats)
return &SystemInfo{
GoVersion: runtime.Version(),
NumGoroutines: runtime.NumGoroutine(),
MemoryAllocMB: float64(memStats.Alloc) / 1024 / 1024,
MemorySysMB: float64(memStats.Sys) / 1024 / 1024,
NumCPU: runtime.NumCPU(),
GOMAXPROCS: runtime.GOMAXPROCS(0),
}
}
// CheckSingle performs a single component check
func (m *Manager) CheckSingle(ctx context.Context, name string) (*ComponentCheck, bool) {
m.mu.RLock()
checker, ok := m.checkers[name]
m.mu.RUnlock()
if !ok {
return nil, false
}
return m.runCheck(ctx, name, checker), true
}
// LivenessCheck performs a quick liveness check (is the service running?)
func (m *Manager) LivenessCheck() *ComponentCheck {
return &ComponentCheck{
Name: "liveness",
Status: StatusHealthy,
Message: "service is running",
LastChecked: time.Now(),
}
}
// ReadinessCheck performs a readiness check (can the service handle requests?)
func (m *Manager) ReadinessCheck(ctx context.Context) *Response {
// For readiness, we only check critical components
return m.Check(ctx, false)
}
// NewDatabaseChecker creates a database health checker
func NewDatabaseChecker(pingFunc func(ctx context.Context) error) Checker {
return func(ctx context.Context) *ComponentCheck {
check := &ComponentCheck{
Name: "database",
}
if err := pingFunc(ctx); err != nil {
check.Status = StatusUnhealthy
check.Message = err.Error()
} else {
check.Status = StatusHealthy
check.Message = "connected"
}
return check
}
}
// NewCacheChecker creates a cache health checker
func NewCacheChecker(pingFunc func(ctx context.Context) error) Checker {
return func(ctx context.Context) *ComponentCheck {
check := &ComponentCheck{
Name: "cache",
}
if err := pingFunc(ctx); err != nil {
check.Status = StatusDegraded // Cache is usually not critical
check.Message = err.Error()
} else {
check.Status = StatusHealthy
check.Message = "connected"
}
return check
}
}
// NewGitChecker creates a git service health checker
func NewGitChecker(checkFunc func(ctx context.Context) (string, error)) Checker {
return func(ctx context.Context) *ComponentCheck {
check := &ComponentCheck{
Name: "git",
}
version, err := checkFunc(ctx)
if err != nil {
check.Status = StatusUnhealthy
check.Message = err.Error()
} else {
check.Status = StatusHealthy
check.Message = "available"
check.Metadata = map[string]any{
"version": version,
}
}
return check
}
}
// NewSSHChecker creates an SSH service health checker
func NewSSHChecker(isEnabled bool, port int) Checker {
return func(ctx context.Context) *ComponentCheck {
check := &ComponentCheck{
Name: "ssh",
}
if !isEnabled {
check.Status = StatusHealthy
check.Message = "disabled"
} else {
check.Status = StatusHealthy
check.Message = "listening"
check.Metadata = map[string]any{
"port": port,
}
}
return check
}
}
// NewExternalServiceChecker creates a checker for external services
func NewExternalServiceChecker(name string, checkFunc func(ctx context.Context) error) Checker {
return func(ctx context.Context) *ComponentCheck {
check := &ComponentCheck{
Name: name,
}
if err := checkFunc(ctx); err != nil {
check.Status = StatusDegraded
check.Message = err.Error()
} else {
check.Status = StatusHealthy
check.Message = "connected"
}
return check
}
}
// NewQueueChecker creates a checker for the task queue
func NewQueueChecker(getQueueStats func() (pending, processing int)) Checker {
return func(ctx context.Context) *ComponentCheck {
check := &ComponentCheck{
Name: "queue",
}
pending, processing := getQueueStats()
check.Status = StatusHealthy
check.Message = "running"
check.Metadata = map[string]any{
"pending": pending,
"processing": processing,
}
// Mark as degraded if queue is backing up
if pending > 10000 {
check.Status = StatusDegraded
check.Message = "queue backlog is high"
}
return check
}
}