From 21a1563e5335c9c16350c27f2856cded7780c8c5 Mon Sep 17 00:00:00 2001 From: Tyler Koenig Date: Fri, 5 Jun 2026 18:27:42 -0400 Subject: [PATCH] feat(monitor): auto-prune expired maintenance windows Background goroutine runs every 15 minutes, deletes maintenance windows that expired beyond the retention period (default 7 days). Configurable via UPTOP_MAINT_RETENTION env var (Go duration format). Closes #72 --- cmd/uptop/main.go | 5 +++ internal/cluster/cluster_test.go | 17 ++++---- internal/metrics/prometheus_test.go | 17 ++++---- internal/monitor/monitor.go | 45 +++++++++++++++++++-- internal/monitor/monitor_test.go | 15 +++---- internal/server/server_test.go | 17 ++++---- internal/store/sqlstore.go | 12 ++++++ internal/store/sqlstore_test.go | 62 +++++++++++++++++++++++++++++ internal/store/store.go | 1 + 9 files changed, 156 insertions(+), 35 deletions(-) diff --git a/cmd/uptop/main.go b/cmd/uptop/main.go index 3163463..da7302a 100644 --- a/cmd/uptop/main.go +++ b/cmd/uptop/main.go @@ -379,6 +379,11 @@ func runServe(args []string) { if aggStrategy != "" { eng.SetAggStrategy(monitor.AggregationStrategy(aggStrategy)) } + if v := os.Getenv("UPTOP_MAINT_RETENTION"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + eng.SetMaintRetention(d) + } + } ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/internal/cluster/cluster_test.go b/internal/cluster/cluster_test.go index 26ef9b1..58ced9b 100644 --- a/internal/cluster/cluster_test.go +++ b/internal/cluster/cluster_test.go @@ -65,14 +65,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) { return nil, nil } -func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil } -func (m *mockStore) EndMaintenanceWindow(int) error { return nil } -func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil } -func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil } -func (m *mockStore) GetPreference(string) (string, error) { return "", nil } -func (m *mockStore) SetPreference(string, string) error { return nil } -func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil } -func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil } +func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil } +func (m *mockStore) EndMaintenanceWindow(int) error { return nil } +func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil } +func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil } +func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil } +func (m *mockStore) GetPreference(string) (string, error) { return "", nil } +func (m *mockStore) SetPreference(string, string) error { return nil } +func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil } +func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil } func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) { return nil, nil } diff --git a/internal/metrics/prometheus_test.go b/internal/metrics/prometheus_test.go index 52409a3..0a857c7 100644 --- a/internal/metrics/prometheus_test.go +++ b/internal/metrics/prometheus_test.go @@ -63,14 +63,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) { return nil, nil } -func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil } -func (m *mockStore) EndMaintenanceWindow(int) error { return nil } -func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil } -func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil } -func (m *mockStore) GetPreference(string) (string, error) { return "", nil } -func (m *mockStore) SetPreference(string, string) error { return nil } -func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil } -func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil } +func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil } +func (m *mockStore) EndMaintenanceWindow(int) error { return nil } +func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil } +func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil } +func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil } +func (m *mockStore) GetPreference(string) (string, error) { return "", nil } +func (m *mockStore) SetPreference(string, string) error { return nil } +func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil } +func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil } func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) { return nil, nil } diff --git a/internal/monitor/monitor.go b/internal/monitor/monitor.go index ac20972..4702b9e 100644 --- a/internal/monitor/monitor.go +++ b/internal/monitor/monitor.go @@ -17,10 +17,12 @@ import ( ) const ( - maxLogEntries = 100 - pollInterval = 5 * time.Second - minCheckInterval = 5 - minPushGrace = 60 * time.Second + maxLogEntries = 100 + pollInterval = 5 * time.Second + minCheckInterval = 5 + minPushGrace = 60 * time.Second + maintPruneInterval = 15 * time.Minute + defaultMaintRetention = 7 * 24 * time.Hour ) type AlertHealth struct { @@ -59,6 +61,7 @@ type Engine struct { db store.Store insecureSkipVerify bool allowPrivateTargets bool + maintRetention time.Duration strictClient *http.Client insecureClient *http.Client } @@ -83,6 +86,7 @@ func newEngine(s store.Store, allowPrivateTargets bool) *Engine { aggStrategy: AggAnyDown, isActive: true, allowPrivateTargets: allowPrivateTargets, + maintRetention: defaultMaintRetention, db: s, strictClient: &http.Client{ Transport: &http.Transport{ @@ -103,6 +107,10 @@ func (e *Engine) SetInsecureSkipVerify(skip bool) { e.insecureSkipVerify = skip } +func (e *Engine) SetMaintRetention(d time.Duration) { + e.maintRetention = d +} + var ansiRe = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]`) func sanitizeLog(s string) string { @@ -337,6 +345,35 @@ func (e *Engine) Start(ctx context.Context) { } } }() + + go e.maintenancePruner(ctx) +} + +func (e *Engine) maintenancePruner(ctx context.Context) { + ticker := time.NewTicker(maintPruneInterval) + defer ticker.Stop() + + e.pruneMaintenanceWindows() + + for { + select { + case <-ticker.C: + e.pruneMaintenanceWindows() + case <-ctx.Done(): + return + } + } +} + +func (e *Engine) pruneMaintenanceWindows() { + pruned, err := e.db.PruneExpiredMaintenanceWindows(e.maintRetention) + if err != nil { + e.AddLog(fmt.Sprintf("Maintenance prune error: %v", err)) + return + } + if pruned > 0 { + e.AddLog(fmt.Sprintf("Pruned %d expired maintenance window(s)", pruned)) + } } func (e *Engine) UpdateSiteConfig(site models.Site) { diff --git a/internal/monitor/monitor_test.go b/internal/monitor/monitor_test.go index 585a1ab..45d5a96 100644 --- a/internal/monitor/monitor_test.go +++ b/internal/monitor/monitor_test.go @@ -73,13 +73,14 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) { return nil, nil } -func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil } -func (m *mockStore) EndMaintenanceWindow(int) error { return nil } -func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil } -func (m *mockStore) GetPreference(string) (string, error) { return "", nil } -func (m *mockStore) SetPreference(string, string) error { return nil } -func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil } -func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil } +func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil } +func (m *mockStore) EndMaintenanceWindow(int) error { return nil } +func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil } +func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil } +func (m *mockStore) GetPreference(string) (string, error) { return "", nil } +func (m *mockStore) SetPreference(string, string) error { return nil } +func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil } +func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil } func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) { return nil, nil } diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 73f8b84..77b60b4 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -74,14 +74,15 @@ func (m *mockStore) LoadLogs(int) ([]string, error) { return nil func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) { return nil, nil } -func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil } -func (m *mockStore) EndMaintenanceWindow(int) error { return nil } -func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil } -func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil } -func (m *mockStore) GetPreference(string) (string, error) { return "", nil } -func (m *mockStore) SetPreference(string, string) error { return nil } -func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil } -func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil } +func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil } +func (m *mockStore) EndMaintenanceWindow(int) error { return nil } +func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil } +func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil } +func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil } +func (m *mockStore) GetPreference(string) (string, error) { return "", nil } +func (m *mockStore) SetPreference(string, string) error { return nil } +func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil } +func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil } func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) { return nil, nil } diff --git a/internal/store/sqlstore.go b/internal/store/sqlstore.go index 3b866ef..fb9f27d 100644 --- a/internal/store/sqlstore.go +++ b/internal/store/sqlstore.go @@ -619,6 +619,18 @@ func (s *SQLStore) DeleteMaintenanceWindow(id int) error { return nil } +func (s *SQLStore) PruneExpiredMaintenanceWindows(retention time.Duration) (int64, error) { + cutoff := time.Now().Add(-retention) + result, err := s.db.Exec( + s.q("DELETE FROM maintenance_windows WHERE end_time IS NOT NULL AND end_time < ?"), + cutoff, + ) + if err != nil { + return 0, err + } + return result.RowsAffected() +} + func (s *SQLStore) IsMonitorInMaintenance(monitorID int) (bool, error) { var count int err := s.db.QueryRow(s.q(`SELECT COUNT(*) FROM maintenance_windows diff --git a/internal/store/sqlstore_test.go b/internal/store/sqlstore_test.go index e1d9b50..9c124be 100644 --- a/internal/store/sqlstore_test.go +++ b/internal/store/sqlstore_test.go @@ -315,3 +315,65 @@ func TestDeleteSiteCascade(t *testing.T) { } } } + +func TestPruneExpiredMaintenanceWindows(t *testing.T) { + s := newTestStore(t) + + now := time.Now() + + // Expired 10 days ago — should be pruned with 7d retention. + old := models.MaintenanceWindow{ + MonitorID: 0, + Title: "Old Window", + Type: "maintenance", + StartTime: now.Add(-11 * 24 * time.Hour), + EndTime: now.Add(-10 * 24 * time.Hour), + } + if err := s.AddMaintenanceWindow(old); err != nil { + t.Fatalf("AddMaintenanceWindow (old): %v", err) + } + + // Expired 1 day ago — within 7d retention, should survive. + recent := models.MaintenanceWindow{ + MonitorID: 0, + Title: "Recent Window", + Type: "maintenance", + StartTime: now.Add(-2 * 24 * time.Hour), + EndTime: now.Add(-1 * 24 * time.Hour), + } + if err := s.AddMaintenanceWindow(recent); err != nil { + t.Fatalf("AddMaintenanceWindow (recent): %v", err) + } + + // Ongoing — no end time, should survive. + ongoing := models.MaintenanceWindow{ + MonitorID: 0, + Title: "Ongoing Window", + Type: "maintenance", + StartTime: now.Add(-1 * time.Hour), + } + if err := s.AddMaintenanceWindow(ongoing); err != nil { + t.Fatalf("AddMaintenanceWindow (ongoing): %v", err) + } + + pruned, err := s.PruneExpiredMaintenanceWindows(7 * 24 * time.Hour) + if err != nil { + t.Fatalf("PruneExpiredMaintenanceWindows: %v", err) + } + if pruned != 1 { + t.Errorf("expected 1 pruned, got %d", pruned) + } + + all, err := s.GetAllMaintenanceWindows(100) + if err != nil { + t.Fatalf("GetAllMaintenanceWindows: %v", err) + } + if len(all) != 2 { + t.Fatalf("expected 2 remaining windows, got %d", len(all)) + } + for _, w := range all { + if w.Title == "Old Window" { + t.Error("old window should have been pruned") + } + } +} diff --git a/internal/store/store.go b/internal/store/store.go index 0389a1e..d1242b5 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -66,6 +66,7 @@ type Store interface { AddMaintenanceWindow(mw models.MaintenanceWindow) error EndMaintenanceWindow(id int) error DeleteMaintenanceWindow(id int) error + PruneExpiredMaintenanceWindows(retention time.Duration) (int64, error) IsMonitorInMaintenance(monitorID int) (bool, error) // Preferences -- 2.52.0