feat(monitor): auto-prune expired maintenance windows
Background goroutine runs every 15 minutes, deletes maintenance windows that expired beyond the retention period (default 7 days). Configurable via UPTOP_MAINT_RETENTION env var (Go duration format). Closes #72
This commit was merged in pull request #96.
This commit is contained in:
@@ -65,14 +65,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -63,14 +63,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -17,10 +17,12 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
maxLogEntries = 100
|
||||
pollInterval = 5 * time.Second
|
||||
minCheckInterval = 5
|
||||
minPushGrace = 60 * time.Second
|
||||
maxLogEntries = 100
|
||||
pollInterval = 5 * time.Second
|
||||
minCheckInterval = 5
|
||||
minPushGrace = 60 * time.Second
|
||||
maintPruneInterval = 15 * time.Minute
|
||||
defaultMaintRetention = 7 * 24 * time.Hour
|
||||
)
|
||||
|
||||
type AlertHealth struct {
|
||||
@@ -59,6 +61,7 @@ type Engine struct {
|
||||
db store.Store
|
||||
insecureSkipVerify bool
|
||||
allowPrivateTargets bool
|
||||
maintRetention time.Duration
|
||||
strictClient *http.Client
|
||||
insecureClient *http.Client
|
||||
}
|
||||
@@ -83,6 +86,7 @@ func newEngine(s store.Store, allowPrivateTargets bool) *Engine {
|
||||
aggStrategy: AggAnyDown,
|
||||
isActive: true,
|
||||
allowPrivateTargets: allowPrivateTargets,
|
||||
maintRetention: defaultMaintRetention,
|
||||
db: s,
|
||||
strictClient: &http.Client{
|
||||
Transport: &http.Transport{
|
||||
@@ -103,6 +107,10 @@ func (e *Engine) SetInsecureSkipVerify(skip bool) {
|
||||
e.insecureSkipVerify = skip
|
||||
}
|
||||
|
||||
func (e *Engine) SetMaintRetention(d time.Duration) {
|
||||
e.maintRetention = d
|
||||
}
|
||||
|
||||
var ansiRe = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]`)
|
||||
|
||||
func sanitizeLog(s string) string {
|
||||
@@ -337,6 +345,35 @@ func (e *Engine) Start(ctx context.Context) {
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
go e.maintenancePruner(ctx)
|
||||
}
|
||||
|
||||
func (e *Engine) maintenancePruner(ctx context.Context) {
|
||||
ticker := time.NewTicker(maintPruneInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
e.pruneMaintenanceWindows()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
e.pruneMaintenanceWindows()
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) pruneMaintenanceWindows() {
|
||||
pruned, err := e.db.PruneExpiredMaintenanceWindows(e.maintRetention)
|
||||
if err != nil {
|
||||
e.AddLog(fmt.Sprintf("Maintenance prune error: %v", err))
|
||||
return
|
||||
}
|
||||
if pruned > 0 {
|
||||
e.AddLog(fmt.Sprintf("Pruned %d expired maintenance window(s)", pruned))
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) UpdateSiteConfig(site models.Site) {
|
||||
|
||||
@@ -73,13 +73,14 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -74,14 +74,15 @@ func (m *mockStore) LoadLogs(int) ([]string, error) { return nil
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -619,6 +619,18 @@ func (s *SQLStore) DeleteMaintenanceWindow(id int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SQLStore) PruneExpiredMaintenanceWindows(retention time.Duration) (int64, error) {
|
||||
cutoff := time.Now().Add(-retention)
|
||||
result, err := s.db.Exec(
|
||||
s.q("DELETE FROM maintenance_windows WHERE end_time IS NOT NULL AND end_time < ?"),
|
||||
cutoff,
|
||||
)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return result.RowsAffected()
|
||||
}
|
||||
|
||||
func (s *SQLStore) IsMonitorInMaintenance(monitorID int) (bool, error) {
|
||||
var count int
|
||||
err := s.db.QueryRow(s.q(`SELECT COUNT(*) FROM maintenance_windows
|
||||
|
||||
@@ -315,3 +315,65 @@ func TestDeleteSiteCascade(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPruneExpiredMaintenanceWindows(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
|
||||
now := time.Now()
|
||||
|
||||
// Expired 10 days ago — should be pruned with 7d retention.
|
||||
old := models.MaintenanceWindow{
|
||||
MonitorID: 0,
|
||||
Title: "Old Window",
|
||||
Type: "maintenance",
|
||||
StartTime: now.Add(-11 * 24 * time.Hour),
|
||||
EndTime: now.Add(-10 * 24 * time.Hour),
|
||||
}
|
||||
if err := s.AddMaintenanceWindow(old); err != nil {
|
||||
t.Fatalf("AddMaintenanceWindow (old): %v", err)
|
||||
}
|
||||
|
||||
// Expired 1 day ago — within 7d retention, should survive.
|
||||
recent := models.MaintenanceWindow{
|
||||
MonitorID: 0,
|
||||
Title: "Recent Window",
|
||||
Type: "maintenance",
|
||||
StartTime: now.Add(-2 * 24 * time.Hour),
|
||||
EndTime: now.Add(-1 * 24 * time.Hour),
|
||||
}
|
||||
if err := s.AddMaintenanceWindow(recent); err != nil {
|
||||
t.Fatalf("AddMaintenanceWindow (recent): %v", err)
|
||||
}
|
||||
|
||||
// Ongoing — no end time, should survive.
|
||||
ongoing := models.MaintenanceWindow{
|
||||
MonitorID: 0,
|
||||
Title: "Ongoing Window",
|
||||
Type: "maintenance",
|
||||
StartTime: now.Add(-1 * time.Hour),
|
||||
}
|
||||
if err := s.AddMaintenanceWindow(ongoing); err != nil {
|
||||
t.Fatalf("AddMaintenanceWindow (ongoing): %v", err)
|
||||
}
|
||||
|
||||
pruned, err := s.PruneExpiredMaintenanceWindows(7 * 24 * time.Hour)
|
||||
if err != nil {
|
||||
t.Fatalf("PruneExpiredMaintenanceWindows: %v", err)
|
||||
}
|
||||
if pruned != 1 {
|
||||
t.Errorf("expected 1 pruned, got %d", pruned)
|
||||
}
|
||||
|
||||
all, err := s.GetAllMaintenanceWindows(100)
|
||||
if err != nil {
|
||||
t.Fatalf("GetAllMaintenanceWindows: %v", err)
|
||||
}
|
||||
if len(all) != 2 {
|
||||
t.Fatalf("expected 2 remaining windows, got %d", len(all))
|
||||
}
|
||||
for _, w := range all {
|
||||
if w.Title == "Old Window" {
|
||||
t.Error("old window should have been pruned")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,6 +66,7 @@ type Store interface {
|
||||
AddMaintenanceWindow(mw models.MaintenanceWindow) error
|
||||
EndMaintenanceWindow(id int) error
|
||||
DeleteMaintenanceWindow(id int) error
|
||||
PruneExpiredMaintenanceWindows(retention time.Duration) (int64, error)
|
||||
IsMonitorInMaintenance(monitorID int) (bool, error)
|
||||
|
||||
// Preferences
|
||||
|
||||
Reference in New Issue
Block a user