feat: show error reason when monitors go DOWN
CI / test (pull_request) Successful in 2m42s
CI / lint (pull_request) Successful in 1m11s
CI / vulncheck (pull_request) Successful in 51s

Propagate check failure reasons through the entire stack:
- Checker captures specific errors (DNS, timeout, HTTP status, SSL, etc.)
- Engine tracks LastError, StatusChangedAt, LastSuccessAt per monitor
- State transitions persisted to new state_changes table
- Detail panel shows error reason, HTTP code, state duration, last
  success time, and last 5 state change events
- Monitor table shows inline error preview for DOWN services
- Alert messages include error reason
- Probe nodes forward error reasons to leader

15 files changed across models, checker, engine, store, TUI, and probes.
This commit is contained in:
2026-05-27 19:32:30 -04:00
parent d8a2cab90f
commit bc3a44beac
15 changed files with 299 additions and 96 deletions
+9 -7
View File
@@ -61,13 +61,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
return nil, nil
}
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
func (m *mockStore) SetPreference(string, string) error { return nil }
func (m *mockStore) Close() error { return nil }
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
func (m *mockStore) SetPreference(string, string) error { return nil }
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
func (m *mockStore) Close() error { return nil }
// --- Cluster Start Tests ---
+8 -6
View File
@@ -127,9 +127,10 @@ func probeFetchAssignments(ctx context.Context, client *http.Client, cfg ProbeCo
}
type probeResultItem struct {
SiteID int `json:"site_id"`
LatencyNs int64 `json:"latency_ns"`
IsUp bool `json:"is_up"`
SiteID int `json:"site_id"`
LatencyNs int64 `json:"latency_ns"`
IsUp bool `json:"is_up"`
ErrorReason string `json:"error_reason,omitempty"`
}
func probeExecuteChecks(ctx context.Context, sites []models.Site, strict, insecure *http.Client, allowPrivate bool) []probeResultItem {
@@ -154,9 +155,10 @@ loop:
cr := monitor.RunCheck(s, strict, insecure, false, allowPrivate)
mu.Lock()
results = append(results, probeResultItem{
SiteID: s.ID,
LatencyNs: cr.LatencyNs,
IsUp: cr.Status == "UP",
SiteID: s.ID,
LatencyNs: cr.LatencyNs,
IsUp: cr.Status == "UP",
ErrorReason: cr.ErrorReason,
})
mu.Unlock()
}(site)