From 916c963663d3daa53edd99614a906582c5af29f5 Mon Sep 17 00:00:00 2001 From: Tyler Koenig Date: Thu, 11 Jun 2026 20:45:30 -0400 Subject: [PATCH] fix(engine): apply convergence + push/group check history MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Poll loop now fully converges with the DB: updated site configs are refreshed via UpdateSiteConfig, and sites removed from the DB are evicted from liveState. Previously the loop only added new sites — config edits via apply were ignored until restart, and pruned sites kept being checked and alerting. 2. Push monitors now record check history on each heartbeat via recordCheck. Previously RecordHeartbeat updated state but never wrote to check_history — push uptime % and sparklines were empty. 3. Groups record a synthetic check per evaluation tick so they get uptime history and sparklines instead of blank displays. --- internal/monitor/monitor.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/internal/monitor/monitor.go b/internal/monitor/monitor.go index c02af39..63a12bd 100644 --- a/internal/monitor/monitor.go +++ b/internal/monitor/monitor.go @@ -375,6 +375,8 @@ func (e *Engine) RecordHeartbeat(token string) bool { go e.triggerAlert(alertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.%s", name, downDur)) } + e.recordCheck(targetID, 0, true) + if prevStatus != models.StatusUp && prevStatus != models.StatusPending { e.enqueueWrite(writeStateChange{siteID: targetID, fromStatus: string(prevStatus), toStatus: string(models.StatusUp)}) } @@ -428,9 +430,11 @@ func (e *Engine) Start(ctx context.Context) { } continue } + dbIDs := make(map[int]bool, len(configs)) for _, cfg := range configs { + dbIDs[cfg.ID] = true e.mu.RLock() - _, exists := e.liveState[cfg.ID] + existing, exists := e.liveState[cfg.ID] e.mu.RUnlock() if !exists { e.mu.Lock() @@ -453,9 +457,24 @@ func (e *Engine) Start(ctx context.Context) { defer e.checkerWG.Done() e.monitorRoutine(ctx, id) }(cfg.ID) + } else if existing.SiteConfig != cfg { + e.UpdateSiteConfig(cfg) } } + e.mu.RLock() + var vanished []int + for id := range e.liveState { + if !dbIDs[id] { + vanished = append(vanished, id) + } + } + e.mu.RUnlock() + for _, id := range vanished { + e.RemoveSite(id) + e.AddLog(fmt.Sprintf("Monitor removed (no longer in DB): ID %d", id)) + } + select { case <-time.After(pollInterval): case <-ctx.Done(): @@ -1017,6 +1036,7 @@ func (e *Engine) checkGroup(_ context.Context, site models.Site) { e.applyState(site.ID, func(s *models.Site) { s.Status = status }) + e.recordCheck(site.ID, 0, !status.IsBroken()) } func (e *Engine) EnqueueProbeCheck(siteID int, nodeID string, latencyNs int64, isUp bool) {