feat: show error reason when monitors go DOWN
Propagate check failure reasons through the entire stack: - Checker captures specific errors (DNS, timeout, HTTP status, SSL, etc.) - Engine tracks LastError, StatusChangedAt, LastSuccessAt per monitor - State transitions persisted to new state_changes table - Detail panel shows error reason, HTTP code, state duration, last success time, and last 5 state change events - Monitor table shows inline error preview for DOWN services - Alert messages include error reason - Probe nodes forward error reasons to leader 15 files changed across models, checker, engine, store, TUI, and probes.
This commit is contained in:
@@ -72,6 +72,15 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)`,
|
||||
`CREATE TABLE IF NOT EXISTS state_changes (
|
||||
id SERIAL PRIMARY KEY,
|
||||
site_id INTEGER NOT NULL,
|
||||
from_status TEXT NOT NULL,
|
||||
to_status TEXT NOT NULL,
|
||||
error_reason TEXT DEFAULT '',
|
||||
changed_at TIMESTAMP DEFAULT NOW()
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -79,6 +79,15 @@ func (d *SQLiteDialect) CreateTablesSQL() []string {
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)`,
|
||||
`CREATE TABLE IF NOT EXISTS state_changes (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
site_id INTEGER NOT NULL,
|
||||
from_status TEXT NOT NULL,
|
||||
to_status TEXT NOT NULL,
|
||||
error_reason TEXT DEFAULT '',
|
||||
changed_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -347,6 +347,29 @@ func (s *SQLStore) DeleteUser(id int) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error {
|
||||
_, err := s.db.Exec(s.q("INSERT INTO state_changes (site_id, from_status, to_status, error_reason) VALUES (?, ?, ?, ?)"),
|
||||
siteID, fromStatus, toStatus, errorReason)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) GetStateChanges(siteID int, limit int) ([]models.StateChange, error) {
|
||||
rows, err := s.db.Query(s.q("SELECT id, site_id, from_status, to_status, error_reason, changed_at FROM state_changes WHERE site_id = ? ORDER BY changed_at DESC LIMIT ?"), siteID, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var changes []models.StateChange
|
||||
for rows.Next() {
|
||||
var sc models.StateChange
|
||||
if err := rows.Scan(&sc.ID, &sc.SiteID, &sc.FromStatus, &sc.ToStatus, &sc.ErrorReason, &sc.ChangedAt); err != nil {
|
||||
return changes, err
|
||||
}
|
||||
changes = append(changes, sc)
|
||||
}
|
||||
return changes, rows.Err()
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveCheck(siteID int, latencyNs int64, isUp bool) error {
|
||||
return s.SaveCheckFromNode(siteID, "", latencyNs, isUp)
|
||||
}
|
||||
|
||||
@@ -38,6 +38,10 @@ type Store interface {
|
||||
SaveCheckFromNode(siteID int, nodeID string, latencyNs int64, isUp bool) error
|
||||
LoadAllHistory(limit int) (map[int][]models.CheckRecord, error)
|
||||
|
||||
// State Changes
|
||||
SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error
|
||||
GetStateChanges(siteID int, limit int) ([]models.StateChange, error)
|
||||
|
||||
// Nodes
|
||||
RegisterNode(node models.ProbeNode) error
|
||||
GetNode(id string) (models.ProbeNode, error)
|
||||
|
||||
Reference in New Issue
Block a user