feat: alert channel health indicator + test alerts
CI / test (pull_request) Successful in 2m46s
CI / lint (pull_request) Successful in 1m1s
CI / vulncheck (pull_request) Successful in 51s

Track alert delivery health at runtime:
- AlertHealth struct: LastSendAt, LastSendOK, LastError, SendCount, FailCount
- triggerAlert records success/failure after each Send()
- Health data exposed via GetAlertHealth() for TUI

Alerts tab enriched:
- Health dot column: green (OK), red (failed), gray (never sent)
- LAST SENT column: relative time ("2m ago", "never")
- [t] key sends test notification through selected channel

Inspired by Grafana's contact point health columns.
This commit is contained in:
2026-05-27 21:23:06 -04:00
parent f17f06a1c6
commit 0aa2f9cd8a
3 changed files with 103 additions and 1 deletions
+58
View File
@@ -23,6 +23,14 @@ const (
minPushGrace = 60 * time.Second
)
type AlertHealth struct {
LastSendAt time.Time
LastSendOK bool
LastError string
SendCount int
FailCount int
}
type Engine struct {
mu sync.RWMutex
liveState map[int]models.Site
@@ -42,6 +50,9 @@ type Engine struct {
probeResults map[int]map[string]NodeResult
aggStrategy AggregationStrategy
alertHealthMu sync.RWMutex
alertHealth map[int]AlertHealth
db store.Store
insecureSkipVerify bool
allowPrivateTargets bool
@@ -64,6 +75,7 @@ func newEngine(s store.Store, allowPrivateTargets bool) *Engine {
histories: make(map[int]*SiteHistory),
tokenIndex: make(map[string]int),
probeResults: make(map[int]map[string]NodeResult),
alertHealth: make(map[int]AlertHealth),
aggStrategy: AggAnyDown,
isActive: true,
allowPrivateTargets: allowPrivateTargets,
@@ -578,11 +590,57 @@ func (e *Engine) triggerAlert(alertID int, title, message string) {
defer cancel()
if err := provider.Send(ctx, title, message); err != nil {
e.AddLog(fmt.Sprintf("Alert send failed (%s): %v", cfg.Name, err))
e.recordAlertResult(alertID, false, err.Error())
} else {
e.recordAlertResult(alertID, true, "")
}
}()
}
}
func (e *Engine) recordAlertResult(alertID int, ok bool, errMsg string) {
e.alertHealthMu.Lock()
defer e.alertHealthMu.Unlock()
h := e.alertHealth[alertID]
h.LastSendAt = time.Now()
h.LastSendOK = ok
h.SendCount++
if ok {
h.LastError = ""
} else {
h.LastError = errMsg
h.FailCount++
}
e.alertHealth[alertID] = h
}
func (e *Engine) GetAlertHealth(alertID int) AlertHealth {
e.alertHealthMu.RLock()
defer e.alertHealthMu.RUnlock()
return e.alertHealth[alertID]
}
func (e *Engine) TestAlert(alertID int) error {
cfg, err := e.db.GetAlert(alertID)
if err != nil {
return fmt.Errorf("failed to load alert: %w", err)
}
provider := alert.GetProvider(cfg)
if provider == nil {
return fmt.Errorf("no provider for type %q", cfg.Type)
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
err = provider.Send(ctx, "🧪 Test Alert", fmt.Sprintf("Test notification from uptop for channel '%s'.", cfg.Name))
if err != nil {
e.recordAlertResult(alertID, false, err.Error())
return err
}
e.recordAlertResult(alertID, true, "")
e.AddLog(fmt.Sprintf("Test alert sent to '%s'", cfg.Name))
return nil
}
func (e *Engine) isInMaintenance(monitorID int) bool {
inMaint, err := e.db.IsMonitorInMaintenance(monitorID)
if err != nil {
+33 -1
View File
@@ -2,7 +2,9 @@ package tui
import (
"fmt"
"time"
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
tea "github.com/charmbracelet/bubbletea"
"github.com/charmbracelet/huh"
"github.com/charmbracelet/lipgloss"
@@ -113,26 +115,56 @@ func fmtAlertConfig(alert struct {
}
}
func fmtAlertHealth(h monitor.AlertHealth) string {
if h.LastSendAt.IsZero() {
return subtleStyle.Render("●")
}
if h.LastSendOK {
return specialStyle.Render("●")
}
return dangerStyle.Render("●")
}
func fmtAlertLastSent(h monitor.AlertHealth) string {
if h.LastSendAt.IsZero() {
return subtleStyle.Render("never")
}
d := time.Since(h.LastSendAt)
if d < time.Minute {
return fmt.Sprintf("%ds ago", int(d.Seconds()))
}
if d < time.Hour {
return fmt.Sprintf("%dm ago", int(d.Minutes()))
}
if d < 24*time.Hour {
return fmt.Sprintf("%dh ago", int(d.Hours()))
}
return fmt.Sprintf("%dd ago", int(d.Hours())/24)
}
func (m Model) viewAlertsTab() string {
if len(m.alerts) == 0 {
return "\n No alert channels configured. Press [n] to add one."
}
return m.renderTable(
[]string{"#", "NAME", "TYPE", "CONFIG"},
[]string{"#", "", "NAME", "TYPE", "CONFIG", "LAST SENT"},
len(m.alerts),
func(start, end int) [][]string {
var rows [][]string
for i := start; i < end; i++ {
a := m.alerts[i]
h := m.engine.GetAlertHealth(a.ID)
rows = append(rows, []string{
fmt.Sprintf("%d", i+1),
fmtAlertHealth(h),
m.zones.Mark(fmt.Sprintf("alert-%d", i), limitStr(a.Name, 15)),
fmtAlertType(a.Type),
fmtAlertConfig(struct {
Type string
Settings map[string]string
}{a.Type, a.Settings}),
fmtAlertLastSent(h),
})
}
return rows
+12
View File
@@ -469,6 +469,16 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
m.state = stateFormUser
return m, m.initUserHuhForm()
}
case "t":
if m.currentTab == 1 && len(m.alerts) > 0 {
a := m.alerts[m.cursor]
go func() {
if err := m.engine.TestAlert(a.ID); err != nil {
m.engine.AddLog(fmt.Sprintf("Test alert failed (%s): %v", a.Name, err))
}
}()
return m, nil
}
case " ":
if m.currentTab == 0 && len(m.sites) > 0 && m.sites[m.cursor].Type == "group" {
gid := m.sites[m.cursor].ID
@@ -943,6 +953,8 @@ func (m Model) viewDashboard() string {
switch m.currentTab {
case 0:
keys = "[/]Filter [n]New [e]Edit [i]Info [d]Del [p]Pause [T]Theme [Tab]Switch [q]Quit"
case 1:
keys = "[n]New [e]Edit [d]Del [t]Test [T]Theme [Tab]Switch [q]Quit"
case 2:
keys = "[f]Filter [T]Theme [Tab]Switch [q]Quit"
case 4: