Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f2d663ea76 | |||
| cfbf01274d | |||
| 26e297cbae | |||
| 0aa2f9cd8a | |||
| f17f06a1c6 | |||
| b14d5e19db | |||
| a2b38ddc60 | |||
| 5dc31108f8 | |||
| 63773b13d0 | |||
| bc3a44beac |
@@ -61,13 +61,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
|
||||
// --- Cluster Start Tests ---
|
||||
|
||||
|
||||
@@ -127,9 +127,10 @@ func probeFetchAssignments(ctx context.Context, client *http.Client, cfg ProbeCo
|
||||
}
|
||||
|
||||
type probeResultItem struct {
|
||||
SiteID int `json:"site_id"`
|
||||
LatencyNs int64 `json:"latency_ns"`
|
||||
IsUp bool `json:"is_up"`
|
||||
SiteID int `json:"site_id"`
|
||||
LatencyNs int64 `json:"latency_ns"`
|
||||
IsUp bool `json:"is_up"`
|
||||
ErrorReason string `json:"error_reason,omitempty"`
|
||||
}
|
||||
|
||||
func probeExecuteChecks(ctx context.Context, sites []models.Site, strict, insecure *http.Client, allowPrivate bool) []probeResultItem {
|
||||
@@ -154,9 +155,10 @@ loop:
|
||||
cr := monitor.RunCheck(s, strict, insecure, false, allowPrivate)
|
||||
mu.Lock()
|
||||
results = append(results, probeResultItem{
|
||||
SiteID: s.ID,
|
||||
LatencyNs: cr.LatencyNs,
|
||||
IsUp: cr.Status == "UP",
|
||||
SiteID: s.ID,
|
||||
LatencyNs: cr.LatencyNs,
|
||||
IsUp: cr.Status == "UP",
|
||||
ErrorReason: cr.ErrorReason,
|
||||
})
|
||||
mu.Unlock()
|
||||
}(site)
|
||||
|
||||
@@ -2,13 +2,14 @@ package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
)
|
||||
|
||||
type mockStore struct {
|
||||
@@ -58,13 +59,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
|
||||
func TestMetricsHandler(t *testing.T) {
|
||||
ms := &mockStore{
|
||||
|
||||
@@ -27,14 +27,26 @@ type Site struct {
|
||||
Paused bool
|
||||
Regions string
|
||||
|
||||
FailureCount int
|
||||
Status string
|
||||
StatusCode int
|
||||
Latency time.Duration
|
||||
CertExpiry time.Time
|
||||
HasSSL bool
|
||||
LastCheck time.Time
|
||||
SentSSLWarning bool
|
||||
FailureCount int
|
||||
Status string
|
||||
StatusCode int
|
||||
Latency time.Duration
|
||||
CertExpiry time.Time
|
||||
HasSSL bool
|
||||
LastCheck time.Time
|
||||
SentSSLWarning bool
|
||||
LastError string
|
||||
StatusChangedAt time.Time
|
||||
LastSuccessAt time.Time
|
||||
}
|
||||
|
||||
type StateChange struct {
|
||||
ID int
|
||||
SiteID int
|
||||
FromStatus string
|
||||
ToStatus string
|
||||
ErrorReason string
|
||||
ChangedAt time.Time
|
||||
}
|
||||
|
||||
type AlertConfig struct {
|
||||
|
||||
@@ -11,10 +11,11 @@ const (
|
||||
)
|
||||
|
||||
type NodeResult struct {
|
||||
NodeID string
|
||||
IsUp bool
|
||||
LatencyNs int64
|
||||
CheckedAt time.Time
|
||||
NodeID string
|
||||
IsUp bool
|
||||
LatencyNs int64
|
||||
CheckedAt time.Time
|
||||
ErrorReason string
|
||||
}
|
||||
|
||||
func AggregateStatus(results []NodeResult, strategy AggregationStrategy) (isUp bool, avgLatencyNs int64) {
|
||||
|
||||
+34
-15
@@ -2,6 +2,7 @@ package monitor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"strconv"
|
||||
@@ -15,12 +16,13 @@ import (
|
||||
)
|
||||
|
||||
type CheckResult struct {
|
||||
SiteID int
|
||||
Status string // "UP", "DOWN", "SSL EXP"
|
||||
StatusCode int
|
||||
LatencyNs int64
|
||||
HasSSL bool
|
||||
CertExpiry time.Time
|
||||
SiteID int
|
||||
Status string // "UP", "DOWN", "SSL EXP"
|
||||
StatusCode int
|
||||
LatencyNs int64
|
||||
HasSSL bool
|
||||
CertExpiry time.Time
|
||||
ErrorReason string
|
||||
}
|
||||
|
||||
func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bool, allowPrivate ...bool) CheckResult {
|
||||
@@ -35,7 +37,7 @@ func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bo
|
||||
if ips, err := net.LookupIP(host); err == nil {
|
||||
for _, ip := range ips {
|
||||
if isPrivateIP(ip) {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "target resolves to private IP"}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -52,7 +54,7 @@ func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bo
|
||||
case "dns":
|
||||
return runDNSCheck(site)
|
||||
default:
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "unsupported monitor type: " + site.Type}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +70,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, method, site.URL, nil)
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "invalid request: " + err.Error()}
|
||||
}
|
||||
|
||||
client := strict
|
||||
@@ -88,6 +90,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
||||
|
||||
if err != nil {
|
||||
result.Status = "DOWN"
|
||||
result.ErrorReason = truncateError(err.Error(), 256)
|
||||
return result
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
@@ -95,6 +98,11 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
||||
result.StatusCode = resp.StatusCode
|
||||
if !isCodeAccepted(resp.StatusCode, site.AcceptedCodes) {
|
||||
result.Status = "DOWN"
|
||||
expected := site.AcceptedCodes
|
||||
if expected == "" {
|
||||
expected = "200-299"
|
||||
}
|
||||
result.ErrorReason = fmt.Sprintf("HTTP %d (expected %s)", resp.StatusCode, expected)
|
||||
}
|
||||
|
||||
if site.CheckSSL && resp.TLS != nil && len(resp.TLS.PeerCertificates) > 0 {
|
||||
@@ -103,6 +111,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
||||
result.CertExpiry = cert.NotAfter
|
||||
if time.Now().After(cert.NotAfter) {
|
||||
result.Status = "SSL EXP"
|
||||
result.ErrorReason = "SSL certificate expired"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,7 +126,7 @@ func runPingCheck(site models.Site) CheckResult {
|
||||
|
||||
pinger, err := probing.NewPinger(host)
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "ping setup: " + err.Error()}
|
||||
}
|
||||
pinger.Count = 1
|
||||
pinger.Timeout = siteTimeout(site)
|
||||
@@ -127,8 +136,11 @@ func runPingCheck(site models.Site) CheckResult {
|
||||
err = pinger.Run()
|
||||
latency := time.Since(start)
|
||||
|
||||
if err != nil || pinger.Statistics().PacketsRecv == 0 {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds()}
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "ping failed: " + err.Error()}
|
||||
}
|
||||
if pinger.Statistics().PacketsRecv == 0 {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "no ICMP response"}
|
||||
}
|
||||
|
||||
stats := pinger.Statistics()
|
||||
@@ -148,7 +160,7 @@ func runPortCheck(site models.Site) CheckResult {
|
||||
latency := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds()}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: truncateError(err.Error(), 256)}
|
||||
}
|
||||
_ = conn.Close()
|
||||
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
|
||||
@@ -199,10 +211,10 @@ func runDNSCheck(site models.Site) CheckResult {
|
||||
latency := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds()}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS query failed: " + err.Error()}
|
||||
}
|
||||
if r.Rcode != dns.RcodeSuccess {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds()}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS RCODE: " + dns.RcodeToString[r.Rcode]}
|
||||
}
|
||||
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
|
||||
}
|
||||
@@ -235,3 +247,10 @@ func isCodeAccepted(code int, accepted string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func truncateError(s string, max int) string {
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max-3] + "..."
|
||||
}
|
||||
|
||||
+182
-26
@@ -19,10 +19,18 @@ import (
|
||||
const (
|
||||
maxLogEntries = 100
|
||||
pollInterval = 5 * time.Second
|
||||
pushGracePeriod = 5 * time.Second
|
||||
minCheckInterval = 5
|
||||
minPushGrace = 60 * time.Second
|
||||
)
|
||||
|
||||
type AlertHealth struct {
|
||||
LastSendAt time.Time
|
||||
LastSendOK bool
|
||||
LastError string
|
||||
SendCount int
|
||||
FailCount int
|
||||
}
|
||||
|
||||
type Engine struct {
|
||||
mu sync.RWMutex
|
||||
liveState map[int]models.Site
|
||||
@@ -42,6 +50,9 @@ type Engine struct {
|
||||
probeResults map[int]map[string]NodeResult
|
||||
aggStrategy AggregationStrategy
|
||||
|
||||
alertHealthMu sync.RWMutex
|
||||
alertHealth map[int]AlertHealth
|
||||
|
||||
db store.Store
|
||||
insecureSkipVerify bool
|
||||
allowPrivateTargets bool
|
||||
@@ -64,6 +75,7 @@ func newEngine(s store.Store, allowPrivateTargets bool) *Engine {
|
||||
histories: make(map[int]*SiteHistory),
|
||||
tokenIndex: make(map[string]int),
|
||||
probeResults: make(map[int]map[string]NodeResult),
|
||||
alertHealth: make(map[int]AlertHealth),
|
||||
aggStrategy: AggAnyDown,
|
||||
isActive: true,
|
||||
allowPrivateTargets: allowPrivateTargets,
|
||||
@@ -96,6 +108,19 @@ func sanitizeLog(s string) string {
|
||||
return s
|
||||
}
|
||||
|
||||
func fmtDurationShort(d time.Duration) string {
|
||||
if d < time.Minute {
|
||||
return fmt.Sprintf("%ds", int(d.Seconds()))
|
||||
}
|
||||
if d < time.Hour {
|
||||
return fmt.Sprintf("%dm", int(d.Minutes()))
|
||||
}
|
||||
if d < 24*time.Hour {
|
||||
return fmt.Sprintf("%dh %dm", int(d.Hours()), int(d.Minutes())%60)
|
||||
}
|
||||
return fmt.Sprintf("%dd %dh", int(d.Hours())/24, int(d.Hours())%24)
|
||||
}
|
||||
|
||||
func (e *Engine) AddLog(msg string) {
|
||||
e.logMu.Lock()
|
||||
defer e.logMu.Unlock()
|
||||
@@ -186,17 +211,38 @@ func (e *Engine) RecordHeartbeat(token string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
prevStatus := site.Status
|
||||
site.LastCheck = time.Now()
|
||||
wasDown := site.Status == "DOWN"
|
||||
site.Status = "UP"
|
||||
site.FailureCount = 0
|
||||
site.Latency = 0
|
||||
site.LastError = ""
|
||||
site.LastSuccessAt = time.Now()
|
||||
|
||||
if prevStatus != "UP" {
|
||||
site.StatusChangedAt = time.Now()
|
||||
}
|
||||
|
||||
e.liveState[targetID] = site
|
||||
|
||||
if wasDown {
|
||||
e.AddLog(fmt.Sprintf("Push Monitor '%s' recovered", site.Name))
|
||||
e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.", site.Name))
|
||||
switch prevStatus {
|
||||
case "PENDING":
|
||||
e.AddLog(fmt.Sprintf("Push Monitor '%s' received first heartbeat", site.Name))
|
||||
case "LATE":
|
||||
e.AddLog(fmt.Sprintf("Push Monitor '%s' heartbeat arrived (was late)", site.Name))
|
||||
case "DOWN":
|
||||
downDur := ""
|
||||
if !site.StatusChangedAt.IsZero() {
|
||||
downDur = fmt.Sprintf(" (was down %s)", fmtDurationShort(time.Since(site.StatusChangedAt)))
|
||||
}
|
||||
e.AddLog(fmt.Sprintf("Push Monitor '%s' recovered%s", site.Name, downDur))
|
||||
go e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.%s", site.Name, downDur))
|
||||
}
|
||||
|
||||
if prevStatus != "UP" && prevStatus != "PENDING" {
|
||||
go func() { _ = e.db.SaveStateChange(targetID, prevStatus, "UP", "") }()
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -241,9 +287,6 @@ func (e *Engine) Start(ctx context.Context) {
|
||||
if !exists {
|
||||
e.mu.Lock()
|
||||
s.Status = "PENDING"
|
||||
if s.Type == "push" {
|
||||
s.LastCheck = time.Now()
|
||||
}
|
||||
if h, ok := e.GetHistory(s.ID); ok && len(h.Statuses) > 0 {
|
||||
if h.Statuses[len(h.Statuses)-1] {
|
||||
s.Status = "UP"
|
||||
@@ -283,6 +326,9 @@ func (e *Engine) UpdateSiteConfig(site models.Site) {
|
||||
site.LastCheck = existing.LastCheck
|
||||
site.SentSSLWarning = existing.SentSSLWarning
|
||||
site.FailureCount = existing.FailureCount
|
||||
site.LastError = existing.LastError
|
||||
site.StatusChangedAt = existing.StatusChangedAt
|
||||
site.LastSuccessAt = existing.LastSuccessAt
|
||||
e.liveState[site.ID] = site
|
||||
e.addToTokenIndex(site)
|
||||
}
|
||||
@@ -393,33 +439,62 @@ func (e *Engine) checkByID(id int) {
|
||||
updatedSite.CertExpiry = result.CertExpiry
|
||||
updatedSite.Latency = time.Duration(result.LatencyNs)
|
||||
updatedSite.LastCheck = time.Now()
|
||||
e.handleStatusChange(updatedSite, result.Status, result.StatusCode, time.Duration(result.LatencyNs))
|
||||
e.handleStatusChange(updatedSite, result.Status, result.StatusCode, time.Duration(result.LatencyNs), result.ErrorReason)
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) checkPush(site models.Site) {
|
||||
deadline := site.LastCheck.Add(time.Duration(site.Interval) * time.Second).Add(pushGracePeriod)
|
||||
if time.Now().After(deadline) {
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
} else if site.Status != "UP" {
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
if site.Status == "PENDING" {
|
||||
return
|
||||
}
|
||||
|
||||
interval := time.Duration(site.Interval) * time.Second
|
||||
grace := interval / 2
|
||||
if grace < minPushGrace {
|
||||
grace = minPushGrace
|
||||
}
|
||||
|
||||
overdue := site.LastCheck.Add(interval)
|
||||
graceEnd := overdue.Add(grace)
|
||||
now := time.Now()
|
||||
|
||||
if now.After(graceEnd) {
|
||||
if site.Status != "DOWN" {
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "heartbeat missed")
|
||||
}
|
||||
} else if now.After(overdue) {
|
||||
if site.Status != "LATE" {
|
||||
e.handleStatusChange(site, "LATE", 0, 0, "heartbeat overdue")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int, latency time.Duration) {
|
||||
func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int, latency time.Duration, errorReason string) {
|
||||
if !e.IsActive() {
|
||||
return
|
||||
}
|
||||
|
||||
newState := site
|
||||
newState.StatusCode = code
|
||||
newState.LastError = errorReason
|
||||
|
||||
if rawStatus == "UP" {
|
||||
newState.LastSuccessAt = time.Now()
|
||||
newState.LastError = ""
|
||||
} else {
|
||||
newState.LastSuccessAt = site.LastSuccessAt
|
||||
}
|
||||
|
||||
if site.Status == "UP" && rawStatus != "UP" {
|
||||
newState.FailureCount++
|
||||
if newState.FailureCount > site.MaxRetries {
|
||||
newState.Status = rawStatus
|
||||
newState.FailureCount = site.MaxRetries + 1
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN", site.Name))
|
||||
if errorReason != "" {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN: %s", site.Name, errorReason))
|
||||
} else {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN", site.Name))
|
||||
}
|
||||
} else {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' failed check %d/%d", site.Name, newState.FailureCount, site.MaxRetries))
|
||||
}
|
||||
@@ -431,6 +506,14 @@ func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int
|
||||
newState.FailureCount = site.MaxRetries + 1
|
||||
}
|
||||
|
||||
if newState.Status != site.Status && site.Status != "PENDING" {
|
||||
newState.StatusChangedAt = time.Now()
|
||||
} else if site.StatusChangedAt.IsZero() && newState.Status != "PENDING" {
|
||||
newState.StatusChangedAt = time.Now()
|
||||
} else {
|
||||
newState.StatusChangedAt = site.StatusChangedAt
|
||||
}
|
||||
|
||||
inMaint := e.isInMaintenance(site.ID)
|
||||
|
||||
if site.Type == "http" && site.CheckSSL && site.HasSSL {
|
||||
@@ -455,12 +538,24 @@ func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int
|
||||
|
||||
e.recordCheck(site.ID, latency, rawStatus == "UP")
|
||||
|
||||
if newState.Status != site.Status && site.Status != "PENDING" {
|
||||
go func() { _ = e.db.SaveStateChange(site.ID, site.Status, newState.Status, errorReason) }()
|
||||
}
|
||||
|
||||
isBroken := func(s string) bool { return s == "DOWN" || s == "SSL EXP" }
|
||||
|
||||
if site.Status == "UP" && newState.Status == "LATE" {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' heartbeat overdue", site.Name))
|
||||
}
|
||||
|
||||
if !isBroken(site.Status) && isBroken(newState.Status) && newState.Status != "PENDING" {
|
||||
if inMaint {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' is DOWN (alerts suppressed — maintenance)", site.Name))
|
||||
} else {
|
||||
msg := fmt.Sprintf("Monitor '%s' is DOWN (%s)", site.Name, rawStatus)
|
||||
if errorReason != "" {
|
||||
msg = fmt.Sprintf("Monitor '%s' is DOWN: %s", site.Name, errorReason)
|
||||
}
|
||||
if site.Type == "push" {
|
||||
msg = fmt.Sprintf("Push Monitor '%s' missed heartbeat.", site.Name)
|
||||
}
|
||||
@@ -468,11 +563,17 @@ func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int
|
||||
}
|
||||
}
|
||||
if isBroken(site.Status) && newState.Status == "UP" {
|
||||
if !inMaint {
|
||||
e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Monitor '%s' is UP", site.Name))
|
||||
} else {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' recovered (maintenance active, alert suppressed)", site.Name))
|
||||
downDur := ""
|
||||
if !site.StatusChangedAt.IsZero() {
|
||||
downDur = fmt.Sprintf(" (was down %s)", fmtDurationShort(time.Since(site.StatusChangedAt)))
|
||||
}
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' recovered%s", site.Name, downDur))
|
||||
if !inMaint {
|
||||
e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Monitor '%s' is UP%s", site.Name, downDur))
|
||||
}
|
||||
}
|
||||
if site.Status == "LATE" && newState.Status == "UP" && !isBroken(site.Status) {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' heartbeat arrived (was late)", site.Name))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -489,11 +590,57 @@ func (e *Engine) triggerAlert(alertID int, title, message string) {
|
||||
defer cancel()
|
||||
if err := provider.Send(ctx, title, message); err != nil {
|
||||
e.AddLog(fmt.Sprintf("Alert send failed (%s): %v", cfg.Name, err))
|
||||
e.recordAlertResult(alertID, false, err.Error())
|
||||
} else {
|
||||
e.recordAlertResult(alertID, true, "")
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) recordAlertResult(alertID int, ok bool, errMsg string) {
|
||||
e.alertHealthMu.Lock()
|
||||
defer e.alertHealthMu.Unlock()
|
||||
h := e.alertHealth[alertID]
|
||||
h.LastSendAt = time.Now()
|
||||
h.LastSendOK = ok
|
||||
h.SendCount++
|
||||
if ok {
|
||||
h.LastError = ""
|
||||
} else {
|
||||
h.LastError = errMsg
|
||||
h.FailCount++
|
||||
}
|
||||
e.alertHealth[alertID] = h
|
||||
}
|
||||
|
||||
func (e *Engine) GetAlertHealth(alertID int) AlertHealth {
|
||||
e.alertHealthMu.RLock()
|
||||
defer e.alertHealthMu.RUnlock()
|
||||
return e.alertHealth[alertID]
|
||||
}
|
||||
|
||||
func (e *Engine) TestAlert(alertID int) error {
|
||||
cfg, err := e.db.GetAlert(alertID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load alert: %w", err)
|
||||
}
|
||||
provider := alert.GetProvider(cfg)
|
||||
if provider == nil {
|
||||
return fmt.Errorf("no provider for type %q", cfg.Type)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
err = provider.Send(ctx, "🧪 Test Alert", fmt.Sprintf("Test notification from uptop for channel '%s'.", cfg.Name))
|
||||
if err != nil {
|
||||
e.recordAlertResult(alertID, false, err.Error())
|
||||
return err
|
||||
}
|
||||
e.recordAlertResult(alertID, true, "")
|
||||
e.AddLog(fmt.Sprintf("Test alert sent to '%s'", cfg.Name))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Engine) isInMaintenance(monitorID int) bool {
|
||||
inMaint, err := e.db.IsMonitorInMaintenance(monitorID)
|
||||
if err != nil {
|
||||
@@ -554,16 +701,17 @@ func (e *Engine) SetAggStrategy(strategy AggregationStrategy) {
|
||||
e.aggStrategy = strategy
|
||||
}
|
||||
|
||||
func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, isUp bool) {
|
||||
func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, isUp bool, errorReason string) {
|
||||
e.probeResultsMu.Lock()
|
||||
if e.probeResults[siteID] == nil {
|
||||
e.probeResults[siteID] = make(map[string]NodeResult)
|
||||
}
|
||||
e.probeResults[siteID][nodeID] = NodeResult{
|
||||
NodeID: nodeID,
|
||||
IsUp: isUp,
|
||||
LatencyNs: latencyNs,
|
||||
CheckedAt: time.Now(),
|
||||
NodeID: nodeID,
|
||||
IsUp: isUp,
|
||||
LatencyNs: latencyNs,
|
||||
CheckedAt: time.Now(),
|
||||
ErrorReason: errorReason,
|
||||
}
|
||||
results := make([]NodeResult, 0, len(e.probeResults[siteID]))
|
||||
for _, r := range e.probeResults[siteID] {
|
||||
@@ -588,7 +736,7 @@ func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, i
|
||||
updatedSite := site
|
||||
updatedSite.Latency = time.Duration(avgLatency)
|
||||
updatedSite.LastCheck = time.Now()
|
||||
e.handleStatusChange(updatedSite, rawStatus, 0, time.Duration(avgLatency))
|
||||
e.handleStatusChange(updatedSite, rawStatus, 0, time.Duration(avgLatency), errorReason)
|
||||
}
|
||||
|
||||
func (e *Engine) GetProbeResults(siteID int) map[string]NodeResult {
|
||||
@@ -601,3 +749,11 @@ func (e *Engine) GetProbeResults(siteID int) map[string]NodeResult {
|
||||
}
|
||||
return cp
|
||||
}
|
||||
|
||||
func (e *Engine) GetStateChanges(siteID int, limit int) []models.StateChange {
|
||||
changes, err := e.db.GetStateChanges(siteID, limit)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return changes
|
||||
}
|
||||
|
||||
@@ -2,10 +2,11 @@ package monitor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
)
|
||||
|
||||
// --- Mock Store ---
|
||||
@@ -68,12 +69,14 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
|
||||
func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error) {
|
||||
m.mu.Lock()
|
||||
@@ -174,7 +177,7 @@ func TestHandleStatusChange_PendingToUp(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "PENDING", MaxRetries: 3, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 10*time.Millisecond)
|
||||
e.handleStatusChange(site, "UP", 200, 10*time.Millisecond, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -195,7 +198,7 @@ func TestHandleStatusChange_UpIncrementFailure(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 3, FailureCount: 0}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 500, 0)
|
||||
e.handleStatusChange(site, "DOWN", 500, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -213,7 +216,7 @@ func TestHandleStatusChange_UpToDown_ExceedsRetries(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 2, FailureCount: 2, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 500, 0)
|
||||
e.handleStatusChange(site, "DOWN", 500, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "DOWN" {
|
||||
@@ -236,7 +239,7 @@ func TestHandleStatusChange_UpToDown_ZeroRetries(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, FailureCount: 0, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "DOWN" {
|
||||
@@ -255,7 +258,7 @@ func TestHandleStatusChange_DownToUp_Recovery(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "DOWN", FailureCount: 4, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 5*time.Millisecond)
|
||||
e.handleStatusChange(site, "UP", 200, 5*time.Millisecond, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -276,7 +279,7 @@ func TestHandleStatusChange_DownStaysDown(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "DOWN", MaxRetries: 2, FailureCount: 3}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "DOWN" {
|
||||
@@ -295,7 +298,7 @@ func TestHandleStatusChange_SSLExpired(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "SSL EXP", 0, 0)
|
||||
e.handleStatusChange(site, "SSL EXP", 0, 0, "SSL certificate expired")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "SSL EXP" {
|
||||
@@ -315,7 +318,7 @@ func TestHandleStatusChange_AlertSuppressedMaintenance(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "DOWN" {
|
||||
@@ -346,7 +349,7 @@ func TestHandleStatusChange_RecoverySuppressedMaintenance(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "DOWN", AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -370,7 +373,7 @@ func TestHandleStatusChange_SSLWarning(t *testing.T) {
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if !s.SentSSLWarning {
|
||||
@@ -393,7 +396,7 @@ func TestHandleStatusChange_SSLWarningNotRepeated(t *testing.T) {
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
waitAsync()
|
||||
if len(ms.getAlertCallsSnapshot()) != 0 {
|
||||
@@ -412,7 +415,7 @@ func TestHandleStatusChange_SSLWarningReset(t *testing.T) {
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.SentSSLWarning {
|
||||
@@ -433,7 +436,7 @@ func TestHandleStatusChange_SSLWarningSuppressedMaint(t *testing.T) {
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if !s.SentSSLWarning {
|
||||
@@ -452,7 +455,7 @@ func TestHandleStatusChange_InactiveEngine(t *testing.T) {
|
||||
injectSite(e, site)
|
||||
e.SetActive(false)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -534,7 +537,7 @@ func TestCheckPush_DeadlineMissed(t *testing.T) {
|
||||
site := models.Site{
|
||||
ID: 1, Name: "push", Type: "push", Status: "UP",
|
||||
Interval: 10, MaxRetries: 0,
|
||||
LastCheck: time.Now().Add(-20 * time.Second),
|
||||
LastCheck: time.Now().Add(-120 * time.Second),
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
@@ -546,6 +549,24 @@ func TestCheckPush_DeadlineMissed(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckPush_OverdueBecomesLate(t *testing.T) {
|
||||
ms := newMockStore()
|
||||
e := newTestEngine(ms)
|
||||
site := models.Site{
|
||||
ID: 1, Name: "push", Type: "push", Status: "UP",
|
||||
Interval: 300,
|
||||
LastCheck: time.Now().Add(-310 * time.Second),
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.checkPush(site)
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "LATE" {
|
||||
t.Errorf("expected LATE when overdue but within grace, got %s", s.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckPush_WithinDeadline(t *testing.T) {
|
||||
ms := newMockStore()
|
||||
e := newTestEngine(ms)
|
||||
@@ -563,20 +584,20 @@ func TestCheckPush_WithinDeadline(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckPush_PendingToUp(t *testing.T) {
|
||||
func TestCheckPush_PendingStaysPending(t *testing.T) {
|
||||
ms := newMockStore()
|
||||
e := newTestEngine(ms)
|
||||
site := models.Site{
|
||||
ID: 1, Name: "push", Type: "push", Status: "PENDING",
|
||||
Interval: 60, LastCheck: time.Now(),
|
||||
Interval: 60,
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.checkPush(site)
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
t.Errorf("expected UP, got %s", s.Status)
|
||||
if s.Status != "PENDING" {
|
||||
t.Errorf("expected PENDING to stay until first heartbeat, got %s", s.Status)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -991,7 +1012,7 @@ func TestConcurrent_HandleStatusChangeAndGetState(t *testing.T) {
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
e.handleStatusChange(site, "DOWN", 500, 0)
|
||||
e.handleStatusChange(site, "DOWN", 500, 0, "test error")
|
||||
}()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
@@ -67,6 +67,7 @@ var statusTpl = template.Must(template.New("status").Parse(`
|
||||
.UP { background: #9ece6a; color: #1a1b26; }
|
||||
.DOWN { background: #f7768e; color: #1a1b26; }
|
||||
.PENDING { background: #e0af68; color: #1a1b26; }
|
||||
.LATE { background: #e0af68; color: #1a1b26; }
|
||||
.SSL-EXP { background: #e0af68; color: #1a1b26; }
|
||||
.PAUSED { background: #565f89; color: #c0caf5; }
|
||||
.MAINT { background: #bb9af7; color: #1a1b26; }
|
||||
@@ -403,9 +404,10 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server {
|
||||
var req struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Results []struct {
|
||||
SiteID int `json:"site_id"`
|
||||
LatencyNs int64 `json:"latency_ns"`
|
||||
IsUp bool `json:"is_up"`
|
||||
SiteID int `json:"site_id"`
|
||||
LatencyNs int64 `json:"latency_ns"`
|
||||
IsUp bool `json:"is_up"`
|
||||
ErrorReason string `json:"error_reason"`
|
||||
} `json:"results"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
@@ -420,7 +422,7 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server {
|
||||
if err := s.SaveCheckFromNode(result.SiteID, req.NodeID, result.LatencyNs, result.IsUp); err != nil {
|
||||
log.Printf("Failed to save probe result: %v", err)
|
||||
}
|
||||
eng.IngestProbeResult(req.NodeID, result.SiteID, result.LatencyNs, result.IsUp)
|
||||
eng.IngestProbeResult(req.NodeID, result.SiteID, result.LatencyNs, result.IsUp, result.ErrorReason)
|
||||
}
|
||||
if err := s.UpdateNodeLastSeen(req.NodeID); err != nil {
|
||||
log.Printf("Failed to update node last seen: %v", err)
|
||||
|
||||
@@ -4,13 +4,14 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"net"
|
||||
"net/http"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
)
|
||||
|
||||
// --- Mock Store ---
|
||||
@@ -69,13 +70,15 @@ func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
|
||||
func (m *mockStore) ExportData() (models.Backup, error) {
|
||||
return models.Backup{
|
||||
|
||||
@@ -72,6 +72,15 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)`,
|
||||
`CREATE TABLE IF NOT EXISTS state_changes (
|
||||
id SERIAL PRIMARY KEY,
|
||||
site_id INTEGER NOT NULL,
|
||||
from_status TEXT NOT NULL,
|
||||
to_status TEXT NOT NULL,
|
||||
error_reason TEXT DEFAULT '',
|
||||
changed_at TIMESTAMP DEFAULT NOW()
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -79,6 +79,15 @@ func (d *SQLiteDialect) CreateTablesSQL() []string {
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)`,
|
||||
`CREATE TABLE IF NOT EXISTS state_changes (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
site_id INTEGER NOT NULL,
|
||||
from_status TEXT NOT NULL,
|
||||
to_status TEXT NOT NULL,
|
||||
error_reason TEXT DEFAULT '',
|
||||
changed_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -347,6 +347,29 @@ func (s *SQLStore) DeleteUser(id int) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error {
|
||||
_, err := s.db.Exec(s.q("INSERT INTO state_changes (site_id, from_status, to_status, error_reason) VALUES (?, ?, ?, ?)"),
|
||||
siteID, fromStatus, toStatus, errorReason)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) GetStateChanges(siteID int, limit int) ([]models.StateChange, error) {
|
||||
rows, err := s.db.Query(s.q("SELECT id, site_id, from_status, to_status, error_reason, changed_at FROM state_changes WHERE site_id = ? ORDER BY changed_at DESC LIMIT ?"), siteID, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var changes []models.StateChange
|
||||
for rows.Next() {
|
||||
var sc models.StateChange
|
||||
if err := rows.Scan(&sc.ID, &sc.SiteID, &sc.FromStatus, &sc.ToStatus, &sc.ErrorReason, &sc.ChangedAt); err != nil {
|
||||
return changes, err
|
||||
}
|
||||
changes = append(changes, sc)
|
||||
}
|
||||
return changes, rows.Err()
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveCheck(siteID int, latencyNs int64, isUp bool) error {
|
||||
return s.SaveCheckFromNode(siteID, "", latencyNs, isUp)
|
||||
}
|
||||
|
||||
@@ -38,6 +38,10 @@ type Store interface {
|
||||
SaveCheckFromNode(siteID int, nodeID string, latencyNs int64, isUp bool) error
|
||||
LoadAllHistory(limit int) (map[int][]models.CheckRecord, error)
|
||||
|
||||
// State Changes
|
||||
SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error
|
||||
GetStateChanges(siteID int, limit int) ([]models.StateChange, error)
|
||||
|
||||
// Nodes
|
||||
RegisterNode(node models.ProbeNode) error
|
||||
GetNode(id string) (models.ProbeNode, error)
|
||||
|
||||
@@ -2,7 +2,10 @@ package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/huh"
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
@@ -113,34 +116,122 @@ func fmtAlertConfig(alert struct {
|
||||
}
|
||||
}
|
||||
|
||||
func fmtAlertHealth(h monitor.AlertHealth) string {
|
||||
if h.LastSendAt.IsZero() {
|
||||
return subtleStyle.Render("●")
|
||||
}
|
||||
if h.LastSendOK {
|
||||
return specialStyle.Render("●")
|
||||
}
|
||||
return dangerStyle.Render("●")
|
||||
}
|
||||
|
||||
func fmtAlertLastSent(h monitor.AlertHealth) string {
|
||||
if h.LastSendAt.IsZero() {
|
||||
return subtleStyle.Render("never")
|
||||
}
|
||||
d := time.Since(h.LastSendAt)
|
||||
if d < time.Minute {
|
||||
return fmt.Sprintf("%ds ago", int(d.Seconds()))
|
||||
}
|
||||
if d < time.Hour {
|
||||
return fmt.Sprintf("%dm ago", int(d.Minutes()))
|
||||
}
|
||||
if d < 24*time.Hour {
|
||||
return fmt.Sprintf("%dh ago", int(d.Hours()))
|
||||
}
|
||||
return fmt.Sprintf("%dd ago", int(d.Hours())/24)
|
||||
}
|
||||
|
||||
func (m Model) viewAlertsTab() string {
|
||||
if len(m.alerts) == 0 {
|
||||
return "\n No alert channels configured. Press [n] to add one."
|
||||
}
|
||||
|
||||
var headers []string
|
||||
var widths []int
|
||||
if m.isWide() {
|
||||
headers = []string{"#", "", "NAME", "TYPE", "CONFIG", "LAST SENT"}
|
||||
widths = []int{4, 3, 18, 12, 40, 12}
|
||||
} else {
|
||||
headers = []string{"#", "", "NAME", "TYPE", "CONFIG", "SENT"}
|
||||
widths = []int{4, 3, 14, 10, 24, 8}
|
||||
}
|
||||
nameW := widths[2]
|
||||
cfgW := widths[4]
|
||||
|
||||
return m.renderTable(
|
||||
[]string{"#", "NAME", "TYPE", "CONFIG"},
|
||||
headers,
|
||||
len(m.alerts),
|
||||
func(start, end int) [][]string {
|
||||
var rows [][]string
|
||||
for i := start; i < end; i++ {
|
||||
a := m.alerts[i]
|
||||
h := m.engine.GetAlertHealth(a.ID)
|
||||
rows = append(rows, []string{
|
||||
fmt.Sprintf("%d", i+1),
|
||||
m.zones.Mark(fmt.Sprintf("alert-%d", i), limitStr(a.Name, 15)),
|
||||
fmtAlertHealth(h),
|
||||
m.zones.Mark(fmt.Sprintf("alert-%d", i), limitStr(a.Name, nameW-2)),
|
||||
fmtAlertType(a.Type),
|
||||
fmtAlertConfig(struct {
|
||||
limitStr(fmtAlertConfig(struct {
|
||||
Type string
|
||||
Settings map[string]string
|
||||
}{a.Type, a.Settings}),
|
||||
}{a.Type, a.Settings}), cfgW-2),
|
||||
fmtAlertLastSent(h),
|
||||
})
|
||||
}
|
||||
return rows
|
||||
},
|
||||
nil, nil,
|
||||
widths, nil,
|
||||
)
|
||||
}
|
||||
|
||||
func (m Model) viewAlertDetailPanel() string {
|
||||
if m.cursor >= len(m.alerts) {
|
||||
return ""
|
||||
}
|
||||
a := m.alerts[m.cursor]
|
||||
h := m.engine.GetAlertHealth(a.ID)
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
b.WriteString(subtleStyle.Render(" Alerts > ") + titleStyle.Render(a.Name) + "\n\n")
|
||||
|
||||
row := func(label, value string) {
|
||||
fmt.Fprintf(&b, " %-16s %s\n", subtleStyle.Render(label), value)
|
||||
}
|
||||
|
||||
row("Type", fmtAlertType(a.Type))
|
||||
|
||||
if h.LastSendAt.IsZero() {
|
||||
row("Health", subtleStyle.Render("never sent"))
|
||||
} else if h.LastSendOK {
|
||||
row("Health", specialStyle.Render("OK"))
|
||||
} else {
|
||||
row("Health", dangerStyle.Render("FAILED"))
|
||||
}
|
||||
|
||||
if !h.LastSendAt.IsZero() {
|
||||
row("Last Sent", h.LastSendAt.Format("2006-01-02 15:04:05")+" ("+fmtAlertLastSent(h)+")")
|
||||
}
|
||||
if h.SendCount > 0 {
|
||||
row("Sends", fmt.Sprintf("%d sent, %d failed", h.SendCount, h.FailCount))
|
||||
}
|
||||
if h.LastError != "" {
|
||||
row("Last Error", dangerStyle.Render(limitStr(h.LastError, 60)))
|
||||
}
|
||||
|
||||
b.WriteString("\n" + subtleStyle.Render(" CONFIGURATION") + "\n")
|
||||
for k, v := range a.Settings {
|
||||
row(k, v)
|
||||
}
|
||||
|
||||
b.WriteString("\n\n")
|
||||
b.WriteString(subtleStyle.Render(" [i/Esc] Back [e] Edit [t] Test [q] Quit"))
|
||||
|
||||
return lipgloss.NewStyle().Padding(1, 2).Render(b.String())
|
||||
}
|
||||
|
||||
func (m *Model) initAlertHuhForm() tea.Cmd {
|
||||
m.alertFormData = &alertFormData{
|
||||
AlertType: "discord",
|
||||
|
||||
+89
-21
@@ -5,27 +5,83 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func colorizeLog(line string) string {
|
||||
type logSeverity int
|
||||
|
||||
const (
|
||||
severityInfo logSeverity = iota
|
||||
severityWarn
|
||||
severityDown
|
||||
severityUp
|
||||
severitySystem
|
||||
)
|
||||
|
||||
func classifyLog(line string) logSeverity {
|
||||
lower := strings.ToLower(line)
|
||||
switch {
|
||||
case strings.Contains(lower, "confirmed down"),
|
||||
strings.Contains(lower, "is down"),
|
||||
strings.Contains(lower, "missed heartbeat"),
|
||||
strings.Contains(lower, "failed check"),
|
||||
strings.Contains(lower, "ssl warning"):
|
||||
return dangerStyle.Render(line)
|
||||
strings.Contains(lower, "alert send failed"):
|
||||
return severityDown
|
||||
case strings.Contains(lower, "recovered"),
|
||||
strings.Contains(lower, "is up"),
|
||||
strings.Contains(lower, "recovery"):
|
||||
return specialStyle.Render(line)
|
||||
strings.Contains(lower, "recovery"),
|
||||
strings.Contains(lower, "first heartbeat"):
|
||||
return severityUp
|
||||
case strings.Contains(lower, "failed check"),
|
||||
strings.Contains(lower, "ssl warning"),
|
||||
strings.Contains(lower, "overdue"),
|
||||
strings.Contains(lower, "was late"):
|
||||
return severityWarn
|
||||
case strings.Contains(lower, "engine"),
|
||||
strings.Contains(lower, "cluster"):
|
||||
return titleStyle.Render(line)
|
||||
strings.Contains(lower, "cluster"),
|
||||
strings.Contains(lower, "loaded"),
|
||||
strings.Contains(lower, "paused"),
|
||||
strings.Contains(lower, "resumed"):
|
||||
return severitySystem
|
||||
default:
|
||||
return line
|
||||
return severityInfo
|
||||
}
|
||||
}
|
||||
|
||||
func isImportantLog(sev logSeverity) bool {
|
||||
return sev == severityDown || sev == severityUp || sev == severitySystem
|
||||
}
|
||||
|
||||
func renderLogTag(sev logSeverity) string {
|
||||
switch sev {
|
||||
case severityDown:
|
||||
return dangerStyle.Render(" DOWN ")
|
||||
case severityUp:
|
||||
return specialStyle.Render(" UP ")
|
||||
case severityWarn:
|
||||
return warnStyle.Render(" WARN ")
|
||||
case severitySystem:
|
||||
return titleStyle.Render(" SYS ")
|
||||
default:
|
||||
return subtleStyle.Render(" info ")
|
||||
}
|
||||
}
|
||||
|
||||
func renderLogLine(line string) string {
|
||||
sev := classifyLog(line)
|
||||
tag := renderLogTag(sev)
|
||||
|
||||
ts := ""
|
||||
msg := line
|
||||
if len(line) > 10 && line[0] == '[' {
|
||||
if idx := strings.Index(line, "]"); idx > 0 && idx < 12 {
|
||||
ts = subtleStyle.Render(line[1:idx])
|
||||
msg = strings.TrimSpace(line[idx+1:])
|
||||
}
|
||||
}
|
||||
|
||||
if ts != "" {
|
||||
return fmt.Sprintf(" %s %s %s", ts, tag, msg)
|
||||
}
|
||||
return fmt.Sprintf(" %s %s", tag, msg)
|
||||
}
|
||||
|
||||
func (m Model) viewLogsTab() string {
|
||||
content := m.logViewport.View()
|
||||
if strings.TrimSpace(content) == "" || content == "Waiting for logs..." {
|
||||
@@ -33,22 +89,34 @@ func (m Model) viewLogsTab() string {
|
||||
}
|
||||
|
||||
lines := strings.Split(content, "\n")
|
||||
var colored []string
|
||||
var rendered []string
|
||||
total := 0
|
||||
shown := 0
|
||||
|
||||
for _, line := range lines {
|
||||
if line == "" {
|
||||
colored = append(colored, line)
|
||||
if strings.TrimSpace(line) == "" {
|
||||
continue
|
||||
}
|
||||
colored = append(colored, colorizeLog(line))
|
||||
}
|
||||
|
||||
count := 0
|
||||
for _, l := range lines {
|
||||
if strings.TrimSpace(l) != "" {
|
||||
count++
|
||||
total++
|
||||
sev := classifyLog(line)
|
||||
if m.logFilterImportant && !isImportantLog(sev) {
|
||||
continue
|
||||
}
|
||||
shown++
|
||||
rendered = append(rendered, renderLogLine(line))
|
||||
}
|
||||
|
||||
header := subtleStyle.Render(fmt.Sprintf(" %d entries [↑/↓] Scroll [PgUp/PgDn] Page", count))
|
||||
return "\n" + header + "\n\n" + strings.Join(colored, "\n")
|
||||
filterLabel := "All"
|
||||
if m.logFilterImportant {
|
||||
filterLabel = "Important"
|
||||
}
|
||||
|
||||
header := subtleStyle.Render(fmt.Sprintf(
|
||||
" %d entries [↑/↓] Scroll [PgUp/PgDn] Page [f] Filter: %s", shown, filterLabel))
|
||||
|
||||
if m.logFilterImportant && shown < total {
|
||||
header += subtleStyle.Render(fmt.Sprintf(" (%d hidden)", total-shown))
|
||||
}
|
||||
|
||||
return "\n" + header + "\n\n" + strings.Join(rendered, "\n")
|
||||
}
|
||||
|
||||
+28
-11
@@ -2,10 +2,11 @@ package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/huh"
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
@@ -40,19 +41,19 @@ func fmtMaintType(t string) string {
|
||||
return maintStyle.Render("maintenance")
|
||||
}
|
||||
|
||||
func fmtMaintMonitor(monitorID int, sites []models.Site) string {
|
||||
func fmtMaintMonitorW(monitorID int, sites []models.Site, maxW int) string {
|
||||
if monitorID == 0 {
|
||||
return "All"
|
||||
}
|
||||
for _, s := range sites {
|
||||
if s.ID == monitorID {
|
||||
return limitStr(s.Name, 18)
|
||||
return limitStr(s.Name, maxW)
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("#%d", monitorID)
|
||||
}
|
||||
|
||||
func fmtMaintTime(t time.Time) string {
|
||||
func fmtMaintTime(t time.Time, colW int) string {
|
||||
if t.IsZero() {
|
||||
return subtleStyle.Render("—")
|
||||
}
|
||||
@@ -60,7 +61,10 @@ func fmtMaintTime(t time.Time) string {
|
||||
if t.Year() == now.Year() && t.YearDay() == now.YearDay() {
|
||||
return t.Format("15:04")
|
||||
}
|
||||
return t.Format("15:04 Jan 02")
|
||||
if colW >= 14 {
|
||||
return t.Format("15:04 Jan 02")
|
||||
}
|
||||
return t.Format("Jan 02")
|
||||
}
|
||||
|
||||
func (m Model) isMonitorInMaintenance(monitorID int) bool {
|
||||
@@ -92,8 +96,21 @@ func (m Model) viewMaintTab() string {
|
||||
return "\n No maintenance windows or incidents. Press [n] to create one."
|
||||
}
|
||||
|
||||
var headers []string
|
||||
var widths []int
|
||||
if m.isWide() {
|
||||
headers = []string{"#", "TITLE", "TYPE", "MONITORS", "STATUS", "STARTED", "ENDS"}
|
||||
widths = []int{4, 24, 14, 22, 12, 16, 16}
|
||||
} else {
|
||||
headers = []string{"#", "TITLE", "TYPE", "MON", "ST", "START", "ENDS"}
|
||||
widths = []int{4, 14, 13, 14, 11, 14, 14}
|
||||
}
|
||||
titleW := widths[1]
|
||||
monW := widths[3]
|
||||
timeW := widths[5]
|
||||
|
||||
return m.renderTable(
|
||||
[]string{"#", "TITLE", "TYPE", "MONITORS", "STATUS", "STARTED", "ENDS"},
|
||||
headers,
|
||||
len(m.maintenanceWindows),
|
||||
func(start, end int) [][]string {
|
||||
var rows [][]string
|
||||
@@ -102,17 +119,17 @@ func (m Model) viewMaintTab() string {
|
||||
mw := m.maintenanceWindows[i]
|
||||
rows = append(rows, []string{
|
||||
strconv.Itoa(i + 1),
|
||||
m.zones.Mark(fmt.Sprintf("maint-%d", i), limitStr(mw.Title, 24)),
|
||||
m.zones.Mark(fmt.Sprintf("maint-%d", i), limitStr(mw.Title, titleW-2)),
|
||||
fmtMaintType(mw.Type),
|
||||
fmtMaintMonitor(mw.MonitorID, allSites),
|
||||
fmtMaintMonitorW(mw.MonitorID, allSites, monW-2),
|
||||
fmtMaintStatus(mw),
|
||||
fmtMaintTime(mw.StartTime),
|
||||
fmtMaintTime(mw.EndTime),
|
||||
fmtMaintTime(mw.StartTime, timeW),
|
||||
fmtMaintTime(mw.EndTime, timeW),
|
||||
})
|
||||
}
|
||||
return rows
|
||||
},
|
||||
[]int{6, 0, 14, 20, 12, 16, 16},
|
||||
widths,
|
||||
nil,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -10,16 +10,25 @@ func (m Model) viewNodesTab() string {
|
||||
return "\n No probe nodes connected."
|
||||
}
|
||||
|
||||
colWidths := []int{0, 12, 20, 10, 8}
|
||||
var headers []string
|
||||
var widths []int
|
||||
if m.isWide() {
|
||||
headers = []string{"NAME", "REGION", "LAST SEEN", "VERSION", "STATUS"}
|
||||
widths = []int{24, 14, 16, 12, 10}
|
||||
} else {
|
||||
headers = []string{"NAME", "REGION", "SEEN", "VER", "STATUS"}
|
||||
widths = []int{16, 10, 10, 8, 8}
|
||||
}
|
||||
nameW := widths[0]
|
||||
|
||||
return m.renderTable(
|
||||
[]string{"NAME", "REGION", "LAST SEEN", "VERSION", "STATUS"},
|
||||
headers,
|
||||
len(m.nodes),
|
||||
func(start, end int) [][]string {
|
||||
var rows [][]string
|
||||
for i := start; i < end; i++ {
|
||||
node := m.nodes[i]
|
||||
name := limitStr(node.Name, 20)
|
||||
name := limitStr(node.Name, nameW-2)
|
||||
if name == "" {
|
||||
name = node.ID
|
||||
}
|
||||
@@ -37,7 +46,7 @@ func (m Model) viewNodesTab() string {
|
||||
}
|
||||
return rows
|
||||
},
|
||||
colWidths,
|
||||
widths,
|
||||
nil,
|
||||
)
|
||||
}
|
||||
|
||||
+194
-43
@@ -60,14 +60,18 @@ type siteFormData struct {
|
||||
Regions string
|
||||
}
|
||||
|
||||
func latencySparkline(latencies []time.Duration, width int) string {
|
||||
func latencySparkline(latencies []time.Duration, statuses []bool, width int) string {
|
||||
if len(latencies) == 0 {
|
||||
return subtleStyle.Render(strings.Repeat("·", width))
|
||||
}
|
||||
|
||||
samples := latencies
|
||||
sampledStatuses := statuses
|
||||
if len(samples) > width {
|
||||
samples = samples[len(samples)-width:]
|
||||
if len(sampledStatuses) > width {
|
||||
sampledStatuses = sampledStatuses[len(sampledStatuses)-width:]
|
||||
}
|
||||
}
|
||||
|
||||
minL, maxL := samples[0], samples[0]
|
||||
@@ -85,7 +89,7 @@ func latencySparkline(latencies []time.Duration, width int) string {
|
||||
sb.WriteString(subtleStyle.Render(strings.Repeat("·", remaining)))
|
||||
}
|
||||
spread := maxL - minL
|
||||
for _, l := range samples {
|
||||
for i, l := range samples {
|
||||
idx := 0
|
||||
if spread > 0 {
|
||||
idx = int(float64(l-minL) / float64(spread) * 7)
|
||||
@@ -94,13 +98,18 @@ func latencySparkline(latencies []time.Duration, width int) string {
|
||||
}
|
||||
}
|
||||
ch := string(sparkChars[idx])
|
||||
ms := l.Milliseconds()
|
||||
if ms < 200 {
|
||||
sb.WriteString(specialStyle.Render(ch))
|
||||
} else if ms < 500 {
|
||||
sb.WriteString(warnStyle.Render(ch))
|
||||
} else {
|
||||
isDown := i < len(sampledStatuses) && !sampledStatuses[i]
|
||||
if isDown {
|
||||
sb.WriteString(dangerStyle.Render(ch))
|
||||
} else {
|
||||
ms := l.Milliseconds()
|
||||
if ms < 200 {
|
||||
sb.WriteString(specialStyle.Render(ch))
|
||||
} else if ms < 500 {
|
||||
sb.WriteString(warnStyle.Render(ch))
|
||||
} else {
|
||||
sb.WriteString(dangerStyle.Render(ch))
|
||||
}
|
||||
}
|
||||
}
|
||||
return sb.String()
|
||||
@@ -302,6 +311,8 @@ func fmtStatus(status string, paused bool, inMaint bool) string {
|
||||
switch status {
|
||||
case "DOWN", "SSL EXP":
|
||||
return dangerStyle.Render(status)
|
||||
case "LATE":
|
||||
return warnStyle.Render(status)
|
||||
case "PENDING":
|
||||
return subtleStyle.Render(status)
|
||||
default:
|
||||
@@ -309,28 +320,94 @@ func fmtStatus(status string, paused bool, inMaint bool) string {
|
||||
}
|
||||
}
|
||||
|
||||
func (m Model) dynamicWidths() (nameW, sparkW int) {
|
||||
fixed := 6 + 10 + 10 + 8 + 8 + 7 + 9 // #, TYPE, STATUS, LATENCY, UPTIME, SSL, RETRY
|
||||
overhead := 30 // cell padding + borders
|
||||
avail := m.termWidth - chromePadH - 2 - fixed - overhead
|
||||
if avail < 30 {
|
||||
avail = 30
|
||||
func fmtDuration(d time.Duration) string {
|
||||
if d < time.Minute {
|
||||
return fmt.Sprintf("%ds", int(d.Seconds()))
|
||||
}
|
||||
if d < time.Hour {
|
||||
return fmt.Sprintf("%dm", int(d.Minutes()))
|
||||
}
|
||||
if d < 24*time.Hour {
|
||||
h := int(d.Hours())
|
||||
m := int(d.Minutes()) % 60
|
||||
if m > 0 {
|
||||
return fmt.Sprintf("%dh %dm", h, m)
|
||||
}
|
||||
return fmt.Sprintf("%dh", h)
|
||||
}
|
||||
days := int(d.Hours()) / 24
|
||||
hours := int(d.Hours()) % 24
|
||||
if hours > 0 {
|
||||
return fmt.Sprintf("%dd %dh", days, hours)
|
||||
}
|
||||
return fmt.Sprintf("%dd", days)
|
||||
}
|
||||
|
||||
type tableLayout struct {
|
||||
nameW, sparkW int
|
||||
headers []string
|
||||
colWidths []int
|
||||
}
|
||||
|
||||
func (m Model) computeLayout() tableLayout {
|
||||
wide := m.isWide()
|
||||
|
||||
var fixed int
|
||||
var headers []string
|
||||
var widths []int
|
||||
|
||||
if wide {
|
||||
// # NAME TYPE STATUS LATENCY UPTIME HISTORY SSL RETRIES
|
||||
headers = []string{"#", "NAME", "TYPE", "STATUS", "LATENCY", "UPTIME", "HISTORY", "SSL", "RETRIES"}
|
||||
widths = []int{4, 0, 10, 10, 10, 8, 0, 7, 9}
|
||||
fixed = 4 + 10 + 10 + 10 + 8 + 7 + 9
|
||||
} else {
|
||||
// # NAME TYPE STATUS LAT UP% HISTORY SSL RT
|
||||
headers = []string{"#", "NAME", "TYPE", "STATUS", "LAT", "UP%", "HISTORY", "SSL", "RT"}
|
||||
widths = []int{4, 0, 8, 8, 7, 8, 0, 5, 5}
|
||||
fixed = 4 + 8 + 8 + 7 + 8 + 5 + 5
|
||||
}
|
||||
|
||||
numCols := len(headers)
|
||||
borderOverhead := 2 + (numCols - 1)
|
||||
avail := m.termWidth - chromePadH - 2 - borderOverhead - fixed
|
||||
if avail < 20 {
|
||||
avail = 20
|
||||
}
|
||||
|
||||
maxName := 0
|
||||
for _, s := range m.sites {
|
||||
if n := len([]rune(s.Name)); n > maxName {
|
||||
maxName = n
|
||||
}
|
||||
}
|
||||
maxName += 4
|
||||
|
||||
nameW := avail / 2
|
||||
if nameW > maxName {
|
||||
nameW = maxName
|
||||
}
|
||||
nameW = avail / 2
|
||||
sparkW = avail - nameW - 2 // -2 for spark column padding
|
||||
if nameW < 13 {
|
||||
nameW = 13
|
||||
}
|
||||
if nameW > 40 {
|
||||
nameW = 40
|
||||
}
|
||||
|
||||
sparkW := avail - nameW
|
||||
if sparkW < 10 {
|
||||
sparkW = 10
|
||||
}
|
||||
if sparkW > 60 {
|
||||
sparkW = 60
|
||||
|
||||
widths[1] = nameW
|
||||
widths[6] = sparkW
|
||||
|
||||
return tableLayout{
|
||||
nameW: nameW,
|
||||
sparkW: sparkW,
|
||||
headers: headers,
|
||||
colWidths: widths,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (m Model) viewSitesTab() string {
|
||||
@@ -348,12 +425,16 @@ func (m Model) viewSitesTab() string {
|
||||
return "\n" + welcome
|
||||
}
|
||||
|
||||
nameW, sparkWidth := m.dynamicWidths()
|
||||
colWidths := []int{6, 0, 10, 10, 8, 8, sparkWidth + 2, 7, 9}
|
||||
layout := m.computeLayout()
|
||||
nameW := layout.nameW
|
||||
sparkWidth := layout.sparkW - 2
|
||||
if sparkWidth < 8 {
|
||||
sparkWidth = 8
|
||||
}
|
||||
|
||||
var groupRows map[int]bool
|
||||
return m.renderTable(
|
||||
[]string{"#", "NAME", "TYPE", "STATUS", "LATENCY", "UPTIME", "HISTORY", "SSL", "RETRY"},
|
||||
layout.headers,
|
||||
len(m.sites),
|
||||
func(start, end int) [][]string {
|
||||
groupRows = make(map[int]bool)
|
||||
@@ -366,7 +447,7 @@ func (m Model) viewSitesTab() string {
|
||||
icon := typeIcon("group", m.collapsed[site.ID])
|
||||
rows = append(rows, []string{
|
||||
strconv.Itoa(i + 1),
|
||||
m.zones.Mark(fmt.Sprintf("site-%d", i), icon+" "+limitStr(site.Name, nameW-2)),
|
||||
m.zones.Mark(fmt.Sprintf("site-%d", i), icon+" "+limitStr(site.Name, nameW-4)),
|
||||
"group",
|
||||
fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID)),
|
||||
subtleStyle.Render("—"),
|
||||
@@ -384,9 +465,17 @@ func (m Model) viewSitesTab() string {
|
||||
if i+1 >= len(m.sites) || m.sites[i+1].ParentID != site.ParentID {
|
||||
prefix = "└"
|
||||
}
|
||||
name = prefix + " " + limitStr(name, nameW-2)
|
||||
name = prefix + " " + limitStr(name, nameW-4)
|
||||
} else {
|
||||
name = limitStr(name, nameW)
|
||||
name = limitStr(name, nameW-2)
|
||||
}
|
||||
|
||||
if (site.Status == "DOWN" || site.Status == "SSL EXP" || site.Status == "LATE") && site.LastError != "" {
|
||||
nameLen := len([]rune(name))
|
||||
errSpace := nameW - nameLen - 3
|
||||
if errSpace > 10 {
|
||||
name = name + " " + subtleStyle.Render(limitStr(site.LastError, errSpace))
|
||||
}
|
||||
}
|
||||
|
||||
hist, _ := m.engine.GetHistory(site.ID)
|
||||
@@ -394,7 +483,7 @@ func (m Model) viewSitesTab() string {
|
||||
if site.Type == "push" {
|
||||
spark = heartbeatSparkline(hist.Statuses, sparkWidth)
|
||||
} else {
|
||||
spark = latencySparkline(hist.Latencies, sparkWidth)
|
||||
spark = latencySparkline(hist.Latencies, hist.Statuses, sparkWidth)
|
||||
}
|
||||
|
||||
rows = append(rows, []string{
|
||||
@@ -411,7 +500,7 @@ func (m Model) viewSitesTab() string {
|
||||
}
|
||||
return rows
|
||||
},
|
||||
colWidths,
|
||||
layout.colWidths,
|
||||
func(row, col int) *lipgloss.Style {
|
||||
if groupRows[row] {
|
||||
s := siteGroupStyle
|
||||
@@ -731,7 +820,30 @@ func (m Model) viewDetailPanel() string {
|
||||
fmt.Fprintf(&b, " %-16s %s\n", subtleStyle.Render(label), value)
|
||||
}
|
||||
|
||||
section := func(label string) {
|
||||
b.WriteString("\n" + subtleStyle.Render(" "+label) + "\n")
|
||||
}
|
||||
|
||||
row("Status", fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID)))
|
||||
|
||||
if (site.Status == "DOWN" || site.Status == "SSL EXP" || site.Status == "LATE") && site.LastError != "" {
|
||||
row("Error", dangerStyle.Render(limitStr(site.LastError, 60)))
|
||||
}
|
||||
|
||||
if site.Type == "http" && site.StatusCode > 0 {
|
||||
row("HTTP Code", strconv.Itoa(site.StatusCode))
|
||||
}
|
||||
|
||||
if !site.StatusChangedAt.IsZero() {
|
||||
dur := time.Since(site.StatusChangedAt)
|
||||
row("State Since", site.StatusChangedAt.Format("2006-01-02 15:04:05")+" ("+fmtDuration(dur)+")")
|
||||
}
|
||||
|
||||
if !site.LastSuccessAt.IsZero() {
|
||||
ago := time.Since(site.LastSuccessAt)
|
||||
row("Last Success", site.LastSuccessAt.Format("15:04:05")+" ("+fmtDuration(ago)+" ago)")
|
||||
}
|
||||
|
||||
if m.isMonitorInMaintenance(site.ID) {
|
||||
for _, mw := range m.maintenanceWindows {
|
||||
if mw.Type == "maintenance" && (mw.MonitorID == 0 || mw.MonitorID == site.ID || mw.MonitorID == site.ParentID) {
|
||||
@@ -740,6 +852,8 @@ func (m Model) viewDetailPanel() string {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
section("ENDPOINT")
|
||||
row("Type", site.Type)
|
||||
if site.URL != "" {
|
||||
row("URL", site.URL)
|
||||
@@ -750,31 +864,45 @@ func (m Model) viewDetailPanel() string {
|
||||
if site.Port > 0 {
|
||||
row("Port", strconv.Itoa(site.Port))
|
||||
}
|
||||
|
||||
section("TIMING")
|
||||
row("Interval", fmt.Sprintf("%ds", site.Interval))
|
||||
row("Timeout", fmt.Sprintf("%ds", site.Timeout))
|
||||
if site.Timeout > 0 {
|
||||
row("Timeout", fmt.Sprintf("%ds", site.Timeout))
|
||||
}
|
||||
row("Latency", fmtLatency(site.Latency))
|
||||
row("Uptime", fmtUptime(hist.Statuses))
|
||||
if !site.LastCheck.IsZero() {
|
||||
row("Last Check", site.LastCheck.Format("15:04:05"))
|
||||
}
|
||||
|
||||
if site.Type == "http" {
|
||||
row("Method", site.Method)
|
||||
row("Codes", site.AcceptedCodes)
|
||||
section("HTTP")
|
||||
if site.Method != "" && site.Method != "GET" {
|
||||
row("Method", site.Method)
|
||||
}
|
||||
codes := site.AcceptedCodes
|
||||
if codes == "" {
|
||||
codes = "200-299"
|
||||
}
|
||||
row("Codes", codes)
|
||||
row("SSL", fmtSSL(site))
|
||||
if site.IgnoreTLS {
|
||||
row("TLS Verify", dangerStyle.Render("disabled"))
|
||||
}
|
||||
}
|
||||
|
||||
if site.MaxRetries > 0 {
|
||||
row("Retries", fmtRetries(site))
|
||||
}
|
||||
if site.Regions != "" {
|
||||
row("Regions", site.Regions)
|
||||
}
|
||||
if site.Description != "" {
|
||||
row("Description", site.Description)
|
||||
}
|
||||
if !site.LastCheck.IsZero() {
|
||||
row("Last Check", site.LastCheck.Format("15:04:05"))
|
||||
if site.MaxRetries > 0 || site.Regions != "" || site.Description != "" {
|
||||
section("CONFIG")
|
||||
if site.MaxRetries > 0 {
|
||||
row("Retries", fmtRetries(site))
|
||||
}
|
||||
if site.Regions != "" {
|
||||
row("Regions", site.Regions)
|
||||
}
|
||||
if site.Description != "" {
|
||||
row("Description", site.Description)
|
||||
}
|
||||
}
|
||||
|
||||
probeResults := m.engine.GetProbeResults(site.ID)
|
||||
@@ -787,7 +915,30 @@ func (m Model) viewDetailPanel() string {
|
||||
}
|
||||
latency := time.Duration(result.LatencyNs).Milliseconds()
|
||||
ago := time.Since(result.CheckedAt).Truncate(time.Second)
|
||||
fmt.Fprintf(&b, " %-14s %s %dms %s ago\n", nodeID, status, latency, ago)
|
||||
line := fmt.Sprintf(" %-14s %s %dms %s ago", nodeID, status, latency, ago)
|
||||
if !result.IsUp && result.ErrorReason != "" {
|
||||
line += " " + dangerStyle.Render(limitStr(result.ErrorReason, 30))
|
||||
}
|
||||
b.WriteString(line + "\n")
|
||||
}
|
||||
}
|
||||
|
||||
stateChanges := m.engine.GetStateChanges(site.ID, 5)
|
||||
if len(stateChanges) > 0 {
|
||||
b.WriteString("\n" + subtleStyle.Render(" STATE CHANGES") + "\n")
|
||||
for _, sc := range stateChanges {
|
||||
ago := fmtDuration(time.Since(sc.ChangedAt))
|
||||
arrow := subtleStyle.Render(sc.FromStatus) + " → "
|
||||
if sc.ToStatus == "UP" {
|
||||
arrow += specialStyle.Render(sc.ToStatus)
|
||||
} else {
|
||||
arrow += dangerStyle.Render(sc.ToStatus)
|
||||
}
|
||||
line := fmt.Sprintf(" %s %s", arrow, subtleStyle.Render(ago+" ago"))
|
||||
if sc.ErrorReason != "" && sc.ToStatus != "UP" {
|
||||
line += " " + dangerStyle.Render(limitStr(sc.ErrorReason, 40))
|
||||
}
|
||||
b.WriteString(line + "\n")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -807,7 +958,7 @@ func (m Model) viewDetailPanel() string {
|
||||
up, len(hist.Statuses))
|
||||
}
|
||||
} else {
|
||||
b.WriteString(" " + latencySparkline(hist.Latencies, sparkWidth))
|
||||
b.WriteString(" " + latencySparkline(hist.Latencies, hist.Statuses, sparkWidth))
|
||||
if len(hist.Latencies) > 0 {
|
||||
minL, maxL := hist.Latencies[0], hist.Latencies[0]
|
||||
var total time.Duration
|
||||
|
||||
@@ -32,8 +32,19 @@ func (m Model) viewUsersTab() string {
|
||||
return "\n No users configured. Press [n] to add one."
|
||||
}
|
||||
|
||||
var headers []string
|
||||
var widths []int
|
||||
if m.isWide() {
|
||||
headers = []string{"#", "USERNAME", "ROLE", "PUBLIC KEY"}
|
||||
widths = []int{4, 18, 10, 50}
|
||||
} else {
|
||||
headers = []string{"#", "USER", "ROLE", "KEY"}
|
||||
widths = []int{4, 14, 8, 30}
|
||||
}
|
||||
userW := widths[1]
|
||||
|
||||
return m.renderTable(
|
||||
[]string{"#", "USERNAME", "ROLE", "PUBLIC KEY"},
|
||||
headers,
|
||||
len(m.users),
|
||||
func(start, end int) [][]string {
|
||||
var rows [][]string
|
||||
@@ -41,14 +52,14 @@ func (m Model) viewUsersTab() string {
|
||||
u := m.users[i]
|
||||
rows = append(rows, []string{
|
||||
fmt.Sprintf("%d", i+1),
|
||||
m.zones.Mark(fmt.Sprintf("user-%d", i), limitStr(u.Username, 15)),
|
||||
m.zones.Mark(fmt.Sprintf("user-%d", i), limitStr(u.Username, userW-2)),
|
||||
fmtRole(u.Role),
|
||||
fmtKey(u.PublicKey),
|
||||
})
|
||||
}
|
||||
return rows
|
||||
},
|
||||
nil, nil,
|
||||
widths, nil,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,12 @@ var (
|
||||
|
||||
type StyleOverride func(row, col int) *lipgloss.Style
|
||||
|
||||
const wideBreakpoint = 120
|
||||
|
||||
func (m Model) isWide() bool {
|
||||
return m.termWidth >= wideBreakpoint
|
||||
}
|
||||
|
||||
func (m Model) renderTable(headers []string, items int, buildRows func(start, end int) [][]string, colWidths []int, styleOverride StyleOverride) string {
|
||||
if items == 0 {
|
||||
return ""
|
||||
@@ -28,7 +34,16 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
|
||||
selectedVisual := m.cursor - m.tableOffset
|
||||
rows := buildRows(m.tableOffset, end)
|
||||
|
||||
tableWidth := m.termWidth - chromePadH - 2
|
||||
colTotal := 0
|
||||
for _, w := range colWidths {
|
||||
colTotal += w
|
||||
}
|
||||
borderOverhead := 2 + len(colWidths) - 1
|
||||
tableWidth := colTotal + borderOverhead
|
||||
maxWidth := m.termWidth - chromePadH - 2
|
||||
if tableWidth > maxWidth {
|
||||
tableWidth = maxWidth
|
||||
}
|
||||
if tableWidth < 40 {
|
||||
tableWidth = 40
|
||||
}
|
||||
@@ -41,7 +56,11 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
|
||||
Rows(rows...).
|
||||
StyleFunc(func(row, col int) lipgloss.Style {
|
||||
if row == table.HeaderRow {
|
||||
return tableHeaderStyle
|
||||
h := tableHeaderStyle
|
||||
if col < len(colWidths) && colWidths[col] > 0 {
|
||||
h = h.Width(colWidths[col]).MaxWidth(colWidths[col])
|
||||
}
|
||||
return h
|
||||
}
|
||||
isSelected := row == selectedVisual
|
||||
if styleOverride != nil {
|
||||
@@ -51,7 +70,7 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
|
||||
style = tableSelectedStyle.Foreground(s.GetForeground())
|
||||
}
|
||||
if col < len(colWidths) && colWidths[col] > 0 {
|
||||
style = style.Width(colWidths[col])
|
||||
style = style.Width(colWidths[col]).MaxWidth(colWidths[col])
|
||||
}
|
||||
return style
|
||||
}
|
||||
@@ -64,7 +83,7 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
|
||||
base = tableSelectedStyle
|
||||
}
|
||||
if col < len(colWidths) && colWidths[col] > 0 {
|
||||
base = base.Width(colWidths[col])
|
||||
base = base.Width(colWidths[col]).MaxWidth(colWidths[col])
|
||||
}
|
||||
return base
|
||||
})
|
||||
|
||||
+57
-8
@@ -68,6 +68,7 @@ const (
|
||||
stateLogs
|
||||
stateUsers
|
||||
stateDetail
|
||||
stateAlertDetail
|
||||
stateFormSite
|
||||
stateFormAlert
|
||||
stateFormUser
|
||||
@@ -92,9 +93,10 @@ type Model struct {
|
||||
userFormData *userFormData
|
||||
maintFormData *maintFormData
|
||||
|
||||
logViewport viewport.Model
|
||||
isAdmin bool
|
||||
zones *zone.Manager
|
||||
logViewport viewport.Model
|
||||
logFilterImportant bool
|
||||
isAdmin bool
|
||||
zones *zone.Manager
|
||||
|
||||
deleteID int
|
||||
deleteName string
|
||||
@@ -383,6 +385,14 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
return m, tea.Quit
|
||||
}
|
||||
return m, nil
|
||||
case stateAlertDetail:
|
||||
switch msg.String() {
|
||||
case "i", "esc":
|
||||
m.state = stateDashboard
|
||||
case "q":
|
||||
return m, tea.Quit
|
||||
}
|
||||
return m, nil
|
||||
case stateDashboard, stateLogs, stateUsers:
|
||||
switch msg.String() {
|
||||
case "q":
|
||||
@@ -392,6 +402,11 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.filterMode = true
|
||||
return m, nil
|
||||
}
|
||||
case "f":
|
||||
if m.state == stateLogs {
|
||||
m.logFilterImportant = !m.logFilterImportant
|
||||
return m, nil
|
||||
}
|
||||
case "tab":
|
||||
m.switchTab(m.currentTab + 1)
|
||||
case "pgup", "pgdown":
|
||||
@@ -463,6 +478,16 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.state = stateFormUser
|
||||
return m, m.initUserHuhForm()
|
||||
}
|
||||
case "t":
|
||||
if m.currentTab == 1 && len(m.alerts) > 0 {
|
||||
a := m.alerts[m.cursor]
|
||||
go func() {
|
||||
if err := m.engine.TestAlert(a.ID); err != nil {
|
||||
m.engine.AddLog(fmt.Sprintf("Test alert failed (%s): %v", a.Name, err))
|
||||
}
|
||||
}()
|
||||
return m, nil
|
||||
}
|
||||
case " ":
|
||||
if m.currentTab == 0 && len(m.sites) > 0 && m.sites[m.cursor].Type == "group" {
|
||||
gid := m.sites[m.cursor].ID
|
||||
@@ -481,6 +506,8 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
case "i":
|
||||
if m.currentTab == 0 && len(m.sites) > 0 {
|
||||
m.state = stateDetail
|
||||
} else if m.currentTab == 1 && len(m.alerts) > 0 {
|
||||
m.state = stateAlertDetail
|
||||
}
|
||||
case "x":
|
||||
if m.currentTab == 4 && len(m.maintenanceWindows) > 0 {
|
||||
@@ -802,6 +829,8 @@ func (m Model) View() string {
|
||||
return ""
|
||||
case stateDetail:
|
||||
return m.viewDetailPanel()
|
||||
case stateAlertDetail:
|
||||
return m.viewAlertDetailPanel()
|
||||
default:
|
||||
return m.zones.Scan(m.viewDashboard())
|
||||
}
|
||||
@@ -811,13 +840,20 @@ func (m Model) viewDashboard() string {
|
||||
allSites := m.engine.GetAllSites()
|
||||
totalMonitors := 0
|
||||
downCount := 0
|
||||
lateCount := 0
|
||||
for _, s := range allSites {
|
||||
if s.Type == "group" {
|
||||
continue
|
||||
}
|
||||
totalMonitors++
|
||||
if !s.Paused && !m.isMonitorInMaintenance(s.ID) && (s.Status == "DOWN" || s.Status == "SSL EXP") {
|
||||
if s.Paused || m.isMonitorInMaintenance(s.ID) {
|
||||
continue
|
||||
}
|
||||
switch s.Status {
|
||||
case "DOWN", "SSL EXP":
|
||||
downCount++
|
||||
case "LATE":
|
||||
lateCount++
|
||||
}
|
||||
}
|
||||
offlineNodes := 0
|
||||
@@ -830,6 +866,8 @@ func (m Model) viewDashboard() string {
|
||||
var sitesLabel string
|
||||
if downCount > 0 {
|
||||
sitesLabel = fmt.Sprintf("Sites (%d↓)", downCount)
|
||||
} else if lateCount > 0 {
|
||||
sitesLabel = fmt.Sprintf("Sites (%d⚠)", lateCount)
|
||||
} else if totalMonitors > 0 {
|
||||
sitesLabel = fmt.Sprintf("Sites (%d)", totalMonitors)
|
||||
} else {
|
||||
@@ -895,14 +933,19 @@ func (m Model) viewDashboard() string {
|
||||
}
|
||||
}
|
||||
|
||||
upCount := totalMonitors - downCount
|
||||
upCount := totalMonitors - downCount - lateCount
|
||||
var upStr string
|
||||
if downCount > 0 {
|
||||
upStr = dangerStyle.Render(fmt.Sprintf("%d/%d UP", upCount, totalMonitors))
|
||||
} else if lateCount > 0 {
|
||||
upStr = warnStyle.Render(fmt.Sprintf("%d/%d UP", upCount, totalMonitors))
|
||||
} else {
|
||||
upStr = specialStyle.Render(fmt.Sprintf("%d/%d UP", upCount, totalMonitors))
|
||||
}
|
||||
statusParts := []string{upStr}
|
||||
if lateCount > 0 {
|
||||
statusParts = append(statusParts, warnStyle.Render(fmt.Sprintf("%d LATE", lateCount)))
|
||||
}
|
||||
if len(m.nodes) > 0 {
|
||||
online := 0
|
||||
for _, n := range m.nodes {
|
||||
@@ -923,6 +966,10 @@ func (m Model) viewDashboard() string {
|
||||
switch m.currentTab {
|
||||
case 0:
|
||||
keys = "[/]Filter [n]New [e]Edit [i]Info [d]Del [p]Pause [T]Theme [Tab]Switch [q]Quit"
|
||||
case 1:
|
||||
keys = "[n]New [e]Edit [i]Info [d]Del [t]Test [T]Theme [Tab]Switch [q]Quit"
|
||||
case 2:
|
||||
keys = "[f]Filter [T]Theme [Tab]Switch [q]Quit"
|
||||
case 4:
|
||||
keys = "[n]New [x]End [d]Del [T]Theme [Tab]Switch [q]Quit"
|
||||
case 5:
|
||||
@@ -949,10 +996,12 @@ func siteOrder(s models.Site) int {
|
||||
switch s.Status {
|
||||
case "DOWN", "SSL EXP":
|
||||
return 0
|
||||
case "PENDING":
|
||||
return 2
|
||||
default:
|
||||
case "LATE":
|
||||
return 1
|
||||
case "PENDING":
|
||||
return 3
|
||||
default:
|
||||
return 2
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,274 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Fprintln(os.Stderr, "usage: backfill <db-path>")
|
||||
os.Exit(1)
|
||||
}
|
||||
db, err := sql.Open("sqlite3", os.Args[1])
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "open: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
ids, err := loadSiteIDs(db)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "load site IDs: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
rng := rand.New(rand.NewSource(42))
|
||||
now := time.Now().UTC()
|
||||
|
||||
if err := backfillHistory(db, rng, now, ids); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "history: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillStateChanges(db, now, ids); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "state changes: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillLogs(db, now); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "logs: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillNodes(db, now); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "nodes: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillMaintenance(db, now, ids); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "maintenance: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var count int
|
||||
db.QueryRow("SELECT COUNT(*) FROM check_history").Scan(&count)
|
||||
fmt.Printf("Backfill complete: %d check records\n", count)
|
||||
|
||||
var token string
|
||||
if err := db.QueryRow("SELECT token FROM sites WHERE name='Nightly Backup'").Scan(&token); err == nil {
|
||||
fmt.Printf("PUSH_TOKEN=%s\n", token)
|
||||
}
|
||||
}
|
||||
|
||||
func loadSiteIDs(db *sql.DB) (map[string]int, error) {
|
||||
rows, err := db.Query("SELECT id, name FROM sites")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
ids := make(map[string]int)
|
||||
for rows.Next() {
|
||||
var id int
|
||||
var name string
|
||||
if err := rows.Scan(&id, &name); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ids[name] = id
|
||||
}
|
||||
return ids, rows.Err()
|
||||
}
|
||||
|
||||
type monitorProfile struct {
|
||||
name string
|
||||
minMs int
|
||||
maxMs int
|
||||
downFrom int // check index where DOWN starts (-1 = never)
|
||||
}
|
||||
|
||||
func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]int) error {
|
||||
profiles := []monitorProfile{
|
||||
{"Nextcloud", 40, 80, -1},
|
||||
{"Jellyfin", 80, 200, -1},
|
||||
{"Home Assistant", 15, 45, -1},
|
||||
{"Gitea", 40, 90, -1},
|
||||
{"Traefik Dashboard", 5, 25, -1},
|
||||
{"Vaultwarden", 50, 130, -1},
|
||||
{"Personal Blog", 25, 65, -1},
|
||||
{"Immich", 100, 280, -1}, // spikes handled below
|
||||
{"Auth Portal", 30, 70, 40}, // DOWN after check 40
|
||||
{"Edge Router", 5, 15, -1}, // ping
|
||||
{"Postgres", 1, 5, -1}, // port
|
||||
{"DNS Primary", 10, 30, -1},
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT INTO check_history (site_id, latency_ns, is_up, checked_at) VALUES (?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
const total = 60
|
||||
for _, p := range profiles {
|
||||
siteID, ok := ids[p.name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for i := 0; i < total; i++ {
|
||||
minutesAgo := (total - i) * 24
|
||||
checkedAt := now.Add(-time.Duration(minutesAgo) * time.Minute)
|
||||
|
||||
var latencyNs int64
|
||||
isUp := true
|
||||
|
||||
if p.downFrom >= 0 && i >= p.downFrom {
|
||||
latencyNs = 0
|
||||
isUp = false
|
||||
} else {
|
||||
ms := p.minMs + rng.Intn(p.maxMs-p.minMs)
|
||||
if p.name == "Immich" && i%17 == 0 {
|
||||
ms = 250 + rng.Intn(100)
|
||||
}
|
||||
latencyNs = int64(ms) * 1_000_000
|
||||
}
|
||||
|
||||
if _, err := stmt.Exec(siteID, latencyNs, isUp, checkedAt.Format("2006-01-02 15:04:05")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error {
|
||||
type sc struct {
|
||||
name string
|
||||
from string
|
||||
to string
|
||||
reason string
|
||||
at time.Time
|
||||
}
|
||||
changes := []sc{
|
||||
{"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-3 * 24 * time.Hour).Add(-5 * time.Minute)},
|
||||
{"Nextcloud", "DOWN", "UP", "", now.Add(-3 * 24 * time.Hour)},
|
||||
{"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-3 * time.Minute)},
|
||||
{"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)},
|
||||
{"Auth Portal", "UP", "DOWN", "connection refused", now.Add(-8 * time.Hour)},
|
||||
{"Immich", "UP", "DOWN", "502 Bad Gateway", now.Add(-12 * time.Hour).Add(-8 * time.Minute)},
|
||||
{"Immich", "DOWN", "UP", "", now.Add(-12 * time.Hour)},
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT INTO state_changes (site_id, from_status, to_status, error_reason, changed_at) VALUES (?, ?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, c := range changes {
|
||||
siteID, ok := ids[c.name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, err := stmt.Exec(siteID, c.from, c.to, c.reason, c.at.Format("2006-01-02 15:04:05")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func backfillLogs(db *sql.DB, now time.Time) error {
|
||||
type logEntry struct {
|
||||
msg string
|
||||
at time.Time
|
||||
}
|
||||
logs := []logEntry{
|
||||
{"[06:12] Monitor 'Auth Portal' confirmed DOWN: connection refused", now.Add(-8 * time.Hour)},
|
||||
{"[06:12] Monitor 'Auth Portal' failed check 2/2", now.Add(-8*time.Hour - 30*time.Second)},
|
||||
{"[06:11] Monitor 'Auth Portal' failed check 1/2", now.Add(-8*time.Hour - 60*time.Second)},
|
||||
{"[12:33] Monitor 'Immich' recovered (was down 8m)", now.Add(-12 * time.Hour)},
|
||||
{"[12:25] Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", now.Add(-12*time.Hour - 8*time.Minute)},
|
||||
{"[12:25] Monitor 'Immich' failed check 3/3", now.Add(-12*time.Hour - 8*time.Minute - 30*time.Second)},
|
||||
{"[12:25] Monitor 'Immich' failed check 2/3", now.Add(-12*time.Hour - 8*time.Minute - 60*time.Second)},
|
||||
{"[12:24] Monitor 'Immich' failed check 1/3", now.Add(-12*time.Hour - 9*time.Minute)},
|
||||
{"[06:14] Monitor 'Jellyfin' recovered (was down 3m)", now.Add(-18 * time.Hour)},
|
||||
{"[06:11] Monitor 'Jellyfin' confirmed DOWN: connection reset", now.Add(-18*time.Hour - 3*time.Minute)},
|
||||
{"[06:11] Monitor 'Jellyfin' failed check 2/2", now.Add(-18*time.Hour - 3*time.Minute - 30*time.Second)},
|
||||
{"[06:10] Monitor 'Jellyfin' failed check 1/2", now.Add(-18*time.Hour - 4*time.Minute)},
|
||||
{"[23:45] SSL certificate for 'Personal Blog' expires in 42 days", now.Add(-28 * time.Hour)},
|
||||
{"[08:00] Loaded check history from database", now.Add(-32*time.Hour - 30*time.Minute)},
|
||||
{"[08:00] Engine RESUMED (Active)", now.Add(-32*time.Hour - 30*time.Minute - 5*time.Second)},
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT INTO logs (message, created_at) VALUES (?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, l := range logs {
|
||||
if _, err := stmt.Exec(l.msg, l.at.Format("2006-01-02 15:04:05")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func backfillNodes(db *sql.DB, now time.Time) error {
|
||||
_, err := db.Exec(
|
||||
"INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)",
|
||||
"node-1", "leader", "us-east", now.Format("2006-01-02 15:04:05"), "2026.05.1",
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
func backfillMaintenance(db *sql.DB, now time.Time, ids map[string]int) error {
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT INTO maintenance_windows (monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
jellyfinID := ids["Jellyfin"]
|
||||
past := now.Add(-3 * 24 * time.Hour)
|
||||
if _, err := stmt.Exec(jellyfinID, "Jellyfin upgrade", "Upgrade to v10.10 + plugin updates", "maintenance",
|
||||
past.Format("2006-01-02 15:04:05"),
|
||||
past.Add(2*time.Hour).Format("2006-01-02 15:04:05"),
|
||||
"admin"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
future := now.Add(2 * 24 * time.Hour)
|
||||
if _, err := stmt.Exec(0, "Network switch replacement", "Replacing core switch in rack 2", "maintenance",
|
||||
future.Format("2006-01-02 15:04:05"),
|
||||
future.Add(4*time.Hour).Format("2006-01-02 15:04:05"),
|
||||
"admin"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
Set Shell "bash"
|
||||
Set Width 1400
|
||||
Set Height 800
|
||||
Set FontSize 14
|
||||
Set Padding 20
|
||||
Set Framerate 15
|
||||
Set TypingSpeed 50ms
|
||||
|
||||
Hide
|
||||
Type "bash vhs/setup.sh /tmp/uptop-vhs.db"
|
||||
Enter
|
||||
Sleep 45s
|
||||
Show
|
||||
Sleep 5s
|
||||
|
||||
# Sites tab — hero shot with mixed monitor states
|
||||
Screenshot vhs/screenshots/monitors.png
|
||||
Sleep 1s
|
||||
|
||||
# Navigate to Nextcloud (row 6: group + 3 children + Auth Portal)
|
||||
Down
|
||||
Sleep 200ms
|
||||
Down
|
||||
Sleep 200ms
|
||||
Down
|
||||
Sleep 200ms
|
||||
Down
|
||||
Sleep 200ms
|
||||
Down
|
||||
Sleep 200ms
|
||||
Type "i"
|
||||
Sleep 3s
|
||||
Screenshot vhs/screenshots/detail.png
|
||||
Sleep 1s
|
||||
|
||||
# Close detail
|
||||
Escape
|
||||
Sleep 1s
|
||||
|
||||
# Tab to Alerts
|
||||
Tab
|
||||
Sleep 2s
|
||||
Screenshot vhs/screenshots/alerts.png
|
||||
Sleep 1s
|
||||
|
||||
# Tab to Logs
|
||||
Tab
|
||||
Sleep 2s
|
||||
Screenshot vhs/screenshots/logs.png
|
||||
Sleep 1s
|
||||
|
||||
# Quit
|
||||
Type "q"
|
||||
Sleep 1s
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 84 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 80 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 160 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 219 KiB |
+141
@@ -0,0 +1,141 @@
|
||||
alerts:
|
||||
- name: Discord Homelab
|
||||
type: discord
|
||||
settings:
|
||||
url: https://discord.com/api/webhooks/1234567890/demo-token
|
||||
|
||||
- name: Ntfy Alerts
|
||||
type: webhook
|
||||
settings:
|
||||
url: https://ntfy.example.com/homelab-alerts
|
||||
|
||||
- name: Email Oncall
|
||||
type: email
|
||||
settings:
|
||||
host: smtp.example.com
|
||||
port: "587"
|
||||
user: alerts@example.com
|
||||
pass: "••••••••"
|
||||
from: alerts@example.com
|
||||
to: oncall@example.com
|
||||
|
||||
- name: Slack Ops
|
||||
type: slack
|
||||
settings:
|
||||
url: https://hooks.slack.com/services/T00000/B00000/demo-token
|
||||
|
||||
monitors:
|
||||
# HTTP — homelab services
|
||||
- name: Nextcloud
|
||||
type: http
|
||||
url: https://example.com
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 14
|
||||
max_retries: 2
|
||||
|
||||
- name: Jellyfin
|
||||
type: http
|
||||
url: https://example.com
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
max_retries: 2
|
||||
|
||||
- name: Home Assistant
|
||||
type: http
|
||||
url: https://example.com
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
max_retries: 3
|
||||
|
||||
- name: Gitea
|
||||
type: http
|
||||
url: https://example.com
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 14
|
||||
max_retries: 2
|
||||
|
||||
- name: Traefik Dashboard
|
||||
type: http
|
||||
url: https://example.com
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
max_retries: 1
|
||||
|
||||
- name: Vaultwarden
|
||||
type: http
|
||||
url: https://example.com
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 14
|
||||
max_retries: 3
|
||||
|
||||
- name: Personal Blog
|
||||
type: http
|
||||
url: https://example.com
|
||||
interval: 120
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 14
|
||||
max_retries: 2
|
||||
|
||||
- name: Immich
|
||||
type: http
|
||||
url: https://example.com
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 7
|
||||
max_retries: 3
|
||||
|
||||
# HTTP — deliberate failure
|
||||
- name: Auth Portal
|
||||
type: http
|
||||
url: http://localhost:1
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
max_retries: 2
|
||||
|
||||
# Push — cron jobs
|
||||
- name: Nightly Backup
|
||||
type: push
|
||||
interval: 300
|
||||
alert: Discord Homelab
|
||||
|
||||
- name: Cert Renewal
|
||||
type: push
|
||||
interval: 300
|
||||
alert: Discord Homelab
|
||||
|
||||
# Infrastructure group
|
||||
- name: Infrastructure
|
||||
type: group
|
||||
alert: Discord Homelab
|
||||
monitors:
|
||||
- name: Edge Router
|
||||
type: ping
|
||||
hostname: 8.8.8.8
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
timeout: 5
|
||||
|
||||
- name: Postgres
|
||||
type: port
|
||||
hostname: localhost
|
||||
port: 18099
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
timeout: 5
|
||||
|
||||
- name: DNS Primary
|
||||
type: dns
|
||||
hostname: google.com
|
||||
dns_server: 8.8.8.8
|
||||
dns_resolve_type: A
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
timeout: 5
|
||||
Executable
+27
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# VHS screenshot setup: seed monitors, backfill history, start server.
|
||||
set -e
|
||||
DB="${1:?usage: setup.sh <db-path>}"
|
||||
|
||||
rm -f "$DB" "$DB-shm" "$DB-wal"
|
||||
|
||||
echo "==> Seeding monitors and alerts..."
|
||||
UPTOP_DB_DSN="$DB" ./uptop apply -f vhs/seed.yaml 2>&1
|
||||
|
||||
echo "==> Backfilling check history..."
|
||||
BACKFILL_OUT=$(go run ./vhs/backfill/ "$DB")
|
||||
echo "$BACKFILL_OUT"
|
||||
|
||||
PUSH_TOKEN=$(echo "$BACKFILL_OUT" | grep '^PUSH_TOKEN=' | cut -d= -f2)
|
||||
if [ -n "$PUSH_TOKEN" ]; then
|
||||
echo "==> Sending push heartbeat in 15s (background)..."
|
||||
(sleep 15 && curl -s "http://localhost:18099/api/push" -H "Authorization: Bearer $PUSH_TOKEN" > /dev/null 2>&1) &
|
||||
fi
|
||||
|
||||
echo "==> Starting uptop server..."
|
||||
exec env \
|
||||
UPTOP_DB_DSN="$DB" \
|
||||
UPTOP_PORT=23299 \
|
||||
UPTOP_HTTP_PORT=18099 \
|
||||
UPTOP_ALLOW_PRIVATE_TARGETS=true \
|
||||
./uptop serve 2>/dev/null
|
||||
Reference in New Issue
Block a user