feat: alert channel health indicator + test alerts
Track alert delivery health at runtime:
- AlertHealth struct: LastSendAt, LastSendOK, LastError, SendCount, FailCount
- triggerAlert records success/failure after each Send()
- Health data exposed via GetAlertHealth() for TUI
Alerts tab enriched:
- Health dot column: green (OK), red (failed), gray (never sent)
- LAST SENT column: relative time ("2m ago", "never")
- [t] key sends test notification through selected channel
Inspired by Grafana's contact point health columns.
This commit is contained in:
@@ -23,6 +23,14 @@ const (
|
||||
minPushGrace = 60 * time.Second
|
||||
)
|
||||
|
||||
type AlertHealth struct {
|
||||
LastSendAt time.Time
|
||||
LastSendOK bool
|
||||
LastError string
|
||||
SendCount int
|
||||
FailCount int
|
||||
}
|
||||
|
||||
type Engine struct {
|
||||
mu sync.RWMutex
|
||||
liveState map[int]models.Site
|
||||
@@ -42,6 +50,9 @@ type Engine struct {
|
||||
probeResults map[int]map[string]NodeResult
|
||||
aggStrategy AggregationStrategy
|
||||
|
||||
alertHealthMu sync.RWMutex
|
||||
alertHealth map[int]AlertHealth
|
||||
|
||||
db store.Store
|
||||
insecureSkipVerify bool
|
||||
allowPrivateTargets bool
|
||||
@@ -64,6 +75,7 @@ func newEngine(s store.Store, allowPrivateTargets bool) *Engine {
|
||||
histories: make(map[int]*SiteHistory),
|
||||
tokenIndex: make(map[string]int),
|
||||
probeResults: make(map[int]map[string]NodeResult),
|
||||
alertHealth: make(map[int]AlertHealth),
|
||||
aggStrategy: AggAnyDown,
|
||||
isActive: true,
|
||||
allowPrivateTargets: allowPrivateTargets,
|
||||
@@ -578,11 +590,57 @@ func (e *Engine) triggerAlert(alertID int, title, message string) {
|
||||
defer cancel()
|
||||
if err := provider.Send(ctx, title, message); err != nil {
|
||||
e.AddLog(fmt.Sprintf("Alert send failed (%s): %v", cfg.Name, err))
|
||||
e.recordAlertResult(alertID, false, err.Error())
|
||||
} else {
|
||||
e.recordAlertResult(alertID, true, "")
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) recordAlertResult(alertID int, ok bool, errMsg string) {
|
||||
e.alertHealthMu.Lock()
|
||||
defer e.alertHealthMu.Unlock()
|
||||
h := e.alertHealth[alertID]
|
||||
h.LastSendAt = time.Now()
|
||||
h.LastSendOK = ok
|
||||
h.SendCount++
|
||||
if ok {
|
||||
h.LastError = ""
|
||||
} else {
|
||||
h.LastError = errMsg
|
||||
h.FailCount++
|
||||
}
|
||||
e.alertHealth[alertID] = h
|
||||
}
|
||||
|
||||
func (e *Engine) GetAlertHealth(alertID int) AlertHealth {
|
||||
e.alertHealthMu.RLock()
|
||||
defer e.alertHealthMu.RUnlock()
|
||||
return e.alertHealth[alertID]
|
||||
}
|
||||
|
||||
func (e *Engine) TestAlert(alertID int) error {
|
||||
cfg, err := e.db.GetAlert(alertID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load alert: %w", err)
|
||||
}
|
||||
provider := alert.GetProvider(cfg)
|
||||
if provider == nil {
|
||||
return fmt.Errorf("no provider for type %q", cfg.Type)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
err = provider.Send(ctx, "🧪 Test Alert", fmt.Sprintf("Test notification from uptop for channel '%s'.", cfg.Name))
|
||||
if err != nil {
|
||||
e.recordAlertResult(alertID, false, err.Error())
|
||||
return err
|
||||
}
|
||||
e.recordAlertResult(alertID, true, "")
|
||||
e.AddLog(fmt.Sprintf("Test alert sent to '%s'", cfg.Name))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Engine) isInMaintenance(monitorID int) bool {
|
||||
inMaint, err := e.db.IsMonitorInMaintenance(monitorID)
|
||||
if err != nil {
|
||||
|
||||
@@ -2,7 +2,9 @@ package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/huh"
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
@@ -113,26 +115,56 @@ func fmtAlertConfig(alert struct {
|
||||
}
|
||||
}
|
||||
|
||||
func fmtAlertHealth(h monitor.AlertHealth) string {
|
||||
if h.LastSendAt.IsZero() {
|
||||
return subtleStyle.Render("●")
|
||||
}
|
||||
if h.LastSendOK {
|
||||
return specialStyle.Render("●")
|
||||
}
|
||||
return dangerStyle.Render("●")
|
||||
}
|
||||
|
||||
func fmtAlertLastSent(h monitor.AlertHealth) string {
|
||||
if h.LastSendAt.IsZero() {
|
||||
return subtleStyle.Render("never")
|
||||
}
|
||||
d := time.Since(h.LastSendAt)
|
||||
if d < time.Minute {
|
||||
return fmt.Sprintf("%ds ago", int(d.Seconds()))
|
||||
}
|
||||
if d < time.Hour {
|
||||
return fmt.Sprintf("%dm ago", int(d.Minutes()))
|
||||
}
|
||||
if d < 24*time.Hour {
|
||||
return fmt.Sprintf("%dh ago", int(d.Hours()))
|
||||
}
|
||||
return fmt.Sprintf("%dd ago", int(d.Hours())/24)
|
||||
}
|
||||
|
||||
func (m Model) viewAlertsTab() string {
|
||||
if len(m.alerts) == 0 {
|
||||
return "\n No alert channels configured. Press [n] to add one."
|
||||
}
|
||||
|
||||
return m.renderTable(
|
||||
[]string{"#", "NAME", "TYPE", "CONFIG"},
|
||||
[]string{"#", "", "NAME", "TYPE", "CONFIG", "LAST SENT"},
|
||||
len(m.alerts),
|
||||
func(start, end int) [][]string {
|
||||
var rows [][]string
|
||||
for i := start; i < end; i++ {
|
||||
a := m.alerts[i]
|
||||
h := m.engine.GetAlertHealth(a.ID)
|
||||
rows = append(rows, []string{
|
||||
fmt.Sprintf("%d", i+1),
|
||||
fmtAlertHealth(h),
|
||||
m.zones.Mark(fmt.Sprintf("alert-%d", i), limitStr(a.Name, 15)),
|
||||
fmtAlertType(a.Type),
|
||||
fmtAlertConfig(struct {
|
||||
Type string
|
||||
Settings map[string]string
|
||||
}{a.Type, a.Settings}),
|
||||
fmtAlertLastSent(h),
|
||||
})
|
||||
}
|
||||
return rows
|
||||
|
||||
@@ -469,6 +469,16 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.state = stateFormUser
|
||||
return m, m.initUserHuhForm()
|
||||
}
|
||||
case "t":
|
||||
if m.currentTab == 1 && len(m.alerts) > 0 {
|
||||
a := m.alerts[m.cursor]
|
||||
go func() {
|
||||
if err := m.engine.TestAlert(a.ID); err != nil {
|
||||
m.engine.AddLog(fmt.Sprintf("Test alert failed (%s): %v", a.Name, err))
|
||||
}
|
||||
}()
|
||||
return m, nil
|
||||
}
|
||||
case " ":
|
||||
if m.currentTab == 0 && len(m.sites) > 0 && m.sites[m.cursor].Type == "group" {
|
||||
gid := m.sites[m.cursor].ID
|
||||
@@ -943,6 +953,8 @@ func (m Model) viewDashboard() string {
|
||||
switch m.currentTab {
|
||||
case 0:
|
||||
keys = "[/]Filter [n]New [e]Edit [i]Info [d]Del [p]Pause [T]Theme [Tab]Switch [q]Quit"
|
||||
case 1:
|
||||
keys = "[n]New [e]Edit [d]Del [t]Test [T]Theme [Tab]Switch [q]Quit"
|
||||
case 2:
|
||||
keys = "[f]Filter [T]Theme [Tab]Switch [q]Quit"
|
||||
case 4:
|
||||
|
||||
Reference in New Issue
Block a user