diff --git a/internal/monitor/monitor.go b/internal/monitor/monitor.go index 997fe77..37d9f03 100644 --- a/internal/monitor/monitor.go +++ b/internal/monitor/monitor.go @@ -19,8 +19,8 @@ import ( const ( maxLogEntries = 100 pollInterval = 5 * time.Second - pushGracePeriod = 5 * time.Second minCheckInterval = 5 + minPushGrace = 60 * time.Second ) type Engine struct { @@ -186,17 +186,34 @@ func (e *Engine) RecordHeartbeat(token string) bool { return false } + prevStatus := site.Status site.LastCheck = time.Now() - wasDown := site.Status == "DOWN" site.Status = "UP" site.FailureCount = 0 site.Latency = 0 + site.LastError = "" + site.LastSuccessAt = time.Now() + + if prevStatus != "UP" { + site.StatusChangedAt = time.Now() + } + e.liveState[targetID] = site - if wasDown { + switch prevStatus { + case "PENDING": + e.AddLog(fmt.Sprintf("Push Monitor '%s' received first heartbeat", site.Name)) + case "LATE": + e.AddLog(fmt.Sprintf("Push Monitor '%s' heartbeat arrived (was late)", site.Name)) + case "DOWN": e.AddLog(fmt.Sprintf("Push Monitor '%s' recovered", site.Name)) - e.triggerAlert(site.AlertID, "āœ… RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.", site.Name)) + go e.triggerAlert(site.AlertID, "āœ… RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.", site.Name)) } + + if prevStatus != "UP" && prevStatus != "PENDING" { + go func() { _ = e.db.SaveStateChange(targetID, prevStatus, "UP", "") }() + } + return true } @@ -241,9 +258,6 @@ func (e *Engine) Start(ctx context.Context) { if !exists { e.mu.Lock() s.Status = "PENDING" - if s.Type == "push" { - s.LastCheck = time.Now() - } if h, ok := e.GetHistory(s.ID); ok && len(h.Statuses) > 0 { if h.Statuses[len(h.Statuses)-1] { s.Status = "UP" @@ -401,11 +415,28 @@ func (e *Engine) checkByID(id int) { } func (e *Engine) checkPush(site models.Site) { - deadline := site.LastCheck.Add(time.Duration(site.Interval) * time.Second).Add(pushGracePeriod) - if time.Now().After(deadline) { - e.handleStatusChange(site, "DOWN", 0, 0, "heartbeat missed") - } else if site.Status != "UP" { - e.handleStatusChange(site, "UP", 200, 0, "") + if site.Status == "PENDING" { + return + } + + interval := time.Duration(site.Interval) * time.Second + grace := interval / 2 + if grace < minPushGrace { + grace = minPushGrace + } + + overdue := site.LastCheck.Add(interval) + graceEnd := overdue.Add(grace) + now := time.Now() + + if now.After(graceEnd) { + if site.Status != "DOWN" { + e.handleStatusChange(site, "DOWN", 0, 0, "heartbeat missed") + } + } else if now.After(overdue) { + if site.Status != "LATE" { + e.handleStatusChange(site, "LATE", 0, 0, "heartbeat overdue") + } } } diff --git a/internal/monitor/monitor_test.go b/internal/monitor/monitor_test.go index 148cf6d..4792bf2 100644 --- a/internal/monitor/monitor_test.go +++ b/internal/monitor/monitor_test.go @@ -537,7 +537,7 @@ func TestCheckPush_DeadlineMissed(t *testing.T) { site := models.Site{ ID: 1, Name: "push", Type: "push", Status: "UP", Interval: 10, MaxRetries: 0, - LastCheck: time.Now().Add(-20 * time.Second), + LastCheck: time.Now().Add(-120 * time.Second), } injectSite(e, site) @@ -549,6 +549,24 @@ func TestCheckPush_DeadlineMissed(t *testing.T) { } } +func TestCheckPush_OverdueBecomesLate(t *testing.T) { + ms := newMockStore() + e := newTestEngine(ms) + site := models.Site{ + ID: 1, Name: "push", Type: "push", Status: "UP", + Interval: 300, + LastCheck: time.Now().Add(-310 * time.Second), + } + injectSite(e, site) + + e.checkPush(site) + + s, _ := getSite(e, 1) + if s.Status != "LATE" { + t.Errorf("expected LATE when overdue but within grace, got %s", s.Status) + } +} + func TestCheckPush_WithinDeadline(t *testing.T) { ms := newMockStore() e := newTestEngine(ms) @@ -566,20 +584,20 @@ func TestCheckPush_WithinDeadline(t *testing.T) { } } -func TestCheckPush_PendingToUp(t *testing.T) { +func TestCheckPush_PendingStaysPending(t *testing.T) { ms := newMockStore() e := newTestEngine(ms) site := models.Site{ ID: 1, Name: "push", Type: "push", Status: "PENDING", - Interval: 60, LastCheck: time.Now(), + Interval: 60, } injectSite(e, site) e.checkPush(site) s, _ := getSite(e, 1) - if s.Status != "UP" { - t.Errorf("expected UP, got %s", s.Status) + if s.Status != "PENDING" { + t.Errorf("expected PENDING to stay until first heartbeat, got %s", s.Status) } } diff --git a/internal/server/server.go b/internal/server/server.go index 463aba9..d4aad9e 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -67,6 +67,7 @@ var statusTpl = template.Must(template.New("status").Parse(` .UP { background: #9ece6a; color: #1a1b26; } .DOWN { background: #f7768e; color: #1a1b26; } .PENDING { background: #e0af68; color: #1a1b26; } + .LATE { background: #e0af68; color: #1a1b26; } .SSL-EXP { background: #e0af68; color: #1a1b26; } .PAUSED { background: #565f89; color: #c0caf5; } .MAINT { background: #bb9af7; color: #1a1b26; } diff --git a/internal/tui/tab_sites.go b/internal/tui/tab_sites.go index e07eff1..838ea00 100644 --- a/internal/tui/tab_sites.go +++ b/internal/tui/tab_sites.go @@ -302,6 +302,8 @@ func fmtStatus(status string, paused bool, inMaint bool) string { switch status { case "DOWN", "SSL EXP": return dangerStyle.Render(status) + case "LATE": + return warnStyle.Render(status) case "PENDING": return subtleStyle.Render(status) default: @@ -412,7 +414,7 @@ func (m Model) viewSitesTab() string { name = limitStr(name, nameW) } - if (site.Status == "DOWN" || site.Status == "SSL EXP") && site.LastError != "" { + if (site.Status == "DOWN" || site.Status == "SSL EXP" || site.Status == "LATE") && site.LastError != "" { nameLen := len([]rune(name)) errSpace := nameW - nameLen - 1 if errSpace > 10 { @@ -764,7 +766,7 @@ func (m Model) viewDetailPanel() string { row("Status", fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID))) - if (site.Status == "DOWN" || site.Status == "SSL EXP") && site.LastError != "" { + if (site.Status == "DOWN" || site.Status == "SSL EXP" || site.Status == "LATE") && site.LastError != "" { row("Error", dangerStyle.Render(limitStr(site.LastError, 60))) } diff --git a/internal/tui/tui.go b/internal/tui/tui.go index 7fbbfd2..664a729 100644 --- a/internal/tui/tui.go +++ b/internal/tui/tui.go @@ -811,13 +811,20 @@ func (m Model) viewDashboard() string { allSites := m.engine.GetAllSites() totalMonitors := 0 downCount := 0 + lateCount := 0 for _, s := range allSites { if s.Type == "group" { continue } totalMonitors++ - if !s.Paused && !m.isMonitorInMaintenance(s.ID) && (s.Status == "DOWN" || s.Status == "SSL EXP") { + if s.Paused || m.isMonitorInMaintenance(s.ID) { + continue + } + switch s.Status { + case "DOWN", "SSL EXP": downCount++ + case "LATE": + lateCount++ } } offlineNodes := 0 @@ -830,6 +837,8 @@ func (m Model) viewDashboard() string { var sitesLabel string if downCount > 0 { sitesLabel = fmt.Sprintf("Sites (%d↓)", downCount) + } else if lateCount > 0 { + sitesLabel = fmt.Sprintf("Sites (%d⚠)", lateCount) } else if totalMonitors > 0 { sitesLabel = fmt.Sprintf("Sites (%d)", totalMonitors) } else { @@ -895,14 +904,19 @@ func (m Model) viewDashboard() string { } } - upCount := totalMonitors - downCount + upCount := totalMonitors - downCount - lateCount var upStr string if downCount > 0 { upStr = dangerStyle.Render(fmt.Sprintf("%d/%d UP", upCount, totalMonitors)) + } else if lateCount > 0 { + upStr = warnStyle.Render(fmt.Sprintf("%d/%d UP", upCount, totalMonitors)) } else { upStr = specialStyle.Render(fmt.Sprintf("%d/%d UP", upCount, totalMonitors)) } statusParts := []string{upStr} + if lateCount > 0 { + statusParts = append(statusParts, warnStyle.Render(fmt.Sprintf("%d LATE", lateCount))) + } if len(m.nodes) > 0 { online := 0 for _, n := range m.nodes { @@ -949,10 +963,12 @@ func siteOrder(s models.Site) int { switch s.Status { case "DOWN", "SSL EXP": return 0 - case "PENDING": - return 2 - default: + case "LATE": return 1 + case "PENDING": + return 3 + default: + return 2 } }