fix(engine): apply convergence + push/group check history
CI / test (pull_request) Successful in 1m54s
CI / lint (pull_request) Successful in 1m27s
CI / vulncheck (pull_request) Successful in 1m1s

1. Poll loop now fully converges with the DB: updated site configs
   are refreshed via UpdateSiteConfig, and sites removed from the DB
   are evicted from liveState. Previously the loop only added new
   sites — config edits via apply were ignored until restart, and
   pruned sites kept being checked and alerting.

2. Push monitors now record check history on each heartbeat via
   recordCheck. Previously RecordHeartbeat updated state but never
   wrote to check_history — push uptime % and sparklines were empty.

3. Groups record a synthetic check per evaluation tick so they get
   uptime history and sparklines instead of blank displays.
This commit was merged in pull request #114.
This commit is contained in:
2026-06-11 20:45:30 -04:00
parent fa56f47f96
commit 916c963663
+21 -1
View File
@@ -375,6 +375,8 @@ func (e *Engine) RecordHeartbeat(token string) bool {
go e.triggerAlert(alertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.%s", name, downDur))
}
e.recordCheck(targetID, 0, true)
if prevStatus != models.StatusUp && prevStatus != models.StatusPending {
e.enqueueWrite(writeStateChange{siteID: targetID, fromStatus: string(prevStatus), toStatus: string(models.StatusUp)})
}
@@ -428,9 +430,11 @@ func (e *Engine) Start(ctx context.Context) {
}
continue
}
dbIDs := make(map[int]bool, len(configs))
for _, cfg := range configs {
dbIDs[cfg.ID] = true
e.mu.RLock()
_, exists := e.liveState[cfg.ID]
existing, exists := e.liveState[cfg.ID]
e.mu.RUnlock()
if !exists {
e.mu.Lock()
@@ -453,9 +457,24 @@ func (e *Engine) Start(ctx context.Context) {
defer e.checkerWG.Done()
e.monitorRoutine(ctx, id)
}(cfg.ID)
} else if existing.SiteConfig != cfg {
e.UpdateSiteConfig(cfg)
}
}
e.mu.RLock()
var vanished []int
for id := range e.liveState {
if !dbIDs[id] {
vanished = append(vanished, id)
}
}
e.mu.RUnlock()
for _, id := range vanished {
e.RemoveSite(id)
e.AddLog(fmt.Sprintf("Monitor removed (no longer in DB): ID %d", id))
}
select {
case <-time.After(pollInterval):
case <-ctx.Done():
@@ -1017,6 +1036,7 @@ func (e *Engine) checkGroup(_ context.Context, site models.Site) {
e.applyState(site.ID, func(s *models.Site) {
s.Status = status
})
e.recordCheck(site.ID, 0, !status.IsBroken())
}
func (e *Engine) EnqueueProbeCheck(siteID int, nodeID string, latencyNs int64, isUp bool) {