fix(engine): apply convergence + push/group check history

1. Poll loop now fully converges with the DB: updated site configs are refreshed via UpdateSiteConfig, and sites removed from the DB are evicted from liveState. Previously the loop only added new sites — config edits via apply were ignored until restart, and pruned sites kept being checked and alerting. 2. Push monitors now record check history on each heartbeat via recordCheck. Previously RecordHeartbeat updated state but never wrote to check_history — push uptime % and sparklines were empty. 3. Groups record a synthetic check per evaluation tick so they get uptime history and sparklines instead of blank displays.
2026-06-11 20:45:30 -04:00
parent fa56f47f96
commit 916c963663
1 changed files with 21 additions and 1 deletions
@@ -375,6 +375,8 @@ func (e *Engine) RecordHeartbeat(token string) bool {
 		go e.triggerAlert(alertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.%s", name, downDur))
 	}

+	e.recordCheck(targetID, 0, true)
+
 	if prevStatus != models.StatusUp && prevStatus != models.StatusPending {
 		e.enqueueWrite(writeStateChange{siteID: targetID, fromStatus: string(prevStatus), toStatus: string(models.StatusUp)})
 	}
@@ -428,9 +430,11 @@ func (e *Engine) Start(ctx context.Context) {
 				}
 				continue
 			}
+			dbIDs := make(map[int]bool, len(configs))
 			for _, cfg := range configs {
+				dbIDs[cfg.ID] = true
 				e.mu.RLock()
-				_, exists := e.liveState[cfg.ID]
+				existing, exists := e.liveState[cfg.ID]
 				e.mu.RUnlock()
 				if !exists {
 					e.mu.Lock()
@@ -453,9 +457,24 @@ func (e *Engine) Start(ctx context.Context) {
 						defer e.checkerWG.Done()
 						e.monitorRoutine(ctx, id)
 					}(cfg.ID)
+				} else if existing.SiteConfig != cfg {
+					e.UpdateSiteConfig(cfg)
 				}
 			}

+			e.mu.RLock()
+			var vanished []int
+			for id := range e.liveState {
+				if !dbIDs[id] {
+					vanished = append(vanished, id)
+				}
+			}
+			e.mu.RUnlock()
+			for _, id := range vanished {
+				e.RemoveSite(id)
+				e.AddLog(fmt.Sprintf("Monitor removed (no longer in DB): ID %d", id))
+			}
+
 			select {
 			case <-time.After(pollInterval):
 			case <-ctx.Done():
@@ -1017,6 +1036,7 @@ func (e *Engine) checkGroup(_ context.Context, site models.Site) {
 	e.applyState(site.ID, func(s *models.Site) {
 		s.Status = status
 	})
+	e.recordCheck(site.ID, 0, !status.IsBroken())
 }

 func (e *Engine) EnqueueProbeCheck(siteID int, nodeID string, latencyNs int64, isUp bool) {