fix(engine): six correctness fixes for the state machine
1. Group auto-pause trap: remove the one-way Paused=true mutation from checkGroup — monitorRoutine skipped paused groups, so they could never re-evaluate or auto-unpause. 2. Retry logic: apply MaxRetries to all →DOWN transitions, not just UP→DOWN. New monitors (PENDING) no longer alert on first transient failure when retries are configured. 3. Shutdown drain hole: track checker goroutines with checkerWG so Stop() waits for in-flight checks before draining the write queue. Final drainWrites() catches any writes enqueued after the writer's own drain. 4. Probe-ingest writer bypass: route SaveCheckFromNode through the engine's serialized dbWriter instead of writing directly to the store from the HTTP handler. 5. Dead-probe expiry: expire stale probe results (>3× site interval) before aggregation so a dead probe can't poison status forever. Also clean probeResults in RemoveSite. 6. Maintenance-cache N+1: replace per-check DB query with a fully-resolved in-memory cache refreshed every poll cycle. One GetActiveMaintenanceWindows() call instead of N IsMonitorInMaintenance. ImportData now wipes check_history, state_changes, and alert_health so re-inserted IDs don't inherit stale history from prior occupants.
This commit was merged in pull request #105.
This commit is contained in:
@@ -235,6 +235,46 @@ func TestImportExport(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestImportData_WipesHistory(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
|
||||
if err := s.AddSite(models.Site{Name: "OldSite", URL: "https://old.com", Type: "http", Interval: 30}); err != nil {
|
||||
t.Fatalf("AddSite: %v", err)
|
||||
}
|
||||
if err := s.SaveCheck(1, 5000, true); err != nil {
|
||||
t.Fatalf("SaveCheck: %v", err)
|
||||
}
|
||||
if err := s.SaveStateChange(1, "UP", "DOWN", "timeout"); err != nil {
|
||||
t.Fatalf("SaveStateChange: %v", err)
|
||||
}
|
||||
if err := s.SaveAlertHealth(models.AlertHealthRecord{AlertID: 1, LastSendOK: true, SendCount: 1}); err != nil {
|
||||
t.Fatalf("SaveAlertHealth: %v", err)
|
||||
}
|
||||
|
||||
backup := models.Backup{
|
||||
Sites: []models.Site{{ID: 1, Name: "NewSite", URL: "https://new.com", Type: "http", Interval: 60}},
|
||||
}
|
||||
if err := s.ImportData(backup); err != nil {
|
||||
t.Fatalf("ImportData: %v", err)
|
||||
}
|
||||
|
||||
history, err := s.LoadAllHistory(100)
|
||||
if err != nil {
|
||||
t.Fatalf("LoadAllHistory: %v", err)
|
||||
}
|
||||
if len(history) != 0 {
|
||||
t.Errorf("expected empty check_history after import, got %d sites with history", len(history))
|
||||
}
|
||||
|
||||
changes, err := s.GetStateChanges(1, 100)
|
||||
if err != nil {
|
||||
t.Fatalf("GetStateChanges: %v", err)
|
||||
}
|
||||
if len(changes) != 0 {
|
||||
t.Errorf("expected empty state_changes after import, got %d", len(changes))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckHistory(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user