refactor(models): typed Status constants with IsBroken() predicate

Replace ~150 bare status string comparisons with typed models.Status
constants (StatusUp, StatusDown, StatusPending, StatusLate, StatusStale,
StatusSSLExp). Single IsBroken() method replaces the duplicated
isBroken lambda in monitor.go and isDown function in sla.go.

Adding a new status value (e.g. DEGRADED) now requires one constant
definition instead of grep-and-pray across 16 files.

CheckResult.Status stays string — the checker is the boundary between
raw protocol results and typed status. Cast happens at the edge in
handleStatusChange.
This commit is contained in:
2026-06-11 15:56:51 -04:00
parent c3ae0bd80a
commit f00acbc280
16 changed files with 152 additions and 137 deletions
+16 -16
View File
@@ -47,7 +47,7 @@ func RunCheck(ctx context.Context, site models.Site, strict, insecure *http.Clie
if ips, err := net.LookupIP(host); err == nil {
for _, ip := range ips {
if isPrivateIP(ip) {
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "target resolves to private IP"}
return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "target resolves to private IP"}
}
}
}
@@ -64,7 +64,7 @@ func RunCheck(ctx context.Context, site models.Site, strict, insecure *http.Clie
case "dns":
return runDNSCheck(ctx, site)
default:
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "unsupported monitor type: " + site.Type}
return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "unsupported monitor type: " + site.Type}
}
}
@@ -80,7 +80,7 @@ func runHTTPCheck(ctx context.Context, site models.Site, strict, insecure *http.
req, err := http.NewRequestWithContext(ctx, method, site.URL, nil)
if err != nil {
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "invalid request: " + err.Error()}
return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "invalid request: " + err.Error()}
}
client := strict
@@ -94,12 +94,12 @@ func runHTTPCheck(ctx context.Context, site models.Site, strict, insecure *http.
result := CheckResult{
SiteID: site.ID,
Status: "UP",
Status: string(models.StatusUp),
LatencyNs: latency.Nanoseconds(),
}
if err != nil {
result.Status = "DOWN"
result.Status = string(models.StatusDown)
result.ErrorReason = truncateError(err.Error(), maxErrorLength)
return result
}
@@ -107,7 +107,7 @@ func runHTTPCheck(ctx context.Context, site models.Site, strict, insecure *http.
result.StatusCode = resp.StatusCode
if !isCodeAccepted(resp.StatusCode, site.AcceptedCodes) {
result.Status = "DOWN"
result.Status = string(models.StatusDown)
expected := site.AcceptedCodes
if expected == "" {
expected = defaultAcceptedCodes
@@ -120,7 +120,7 @@ func runHTTPCheck(ctx context.Context, site models.Site, strict, insecure *http.
cert := resp.TLS.PeerCertificates[0]
result.CertExpiry = cert.NotAfter
if time.Now().After(cert.NotAfter) {
result.Status = "SSL EXP"
result.Status = string(models.StatusSSLExp)
result.ErrorReason = "SSL certificate expired"
}
}
@@ -136,7 +136,7 @@ func runPingCheck(_ context.Context, site models.Site) CheckResult {
pinger, err := probing.NewPinger(host)
if err != nil {
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "ping setup: " + err.Error()}
return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "ping setup: " + err.Error()}
}
pinger.Count = 1
pinger.Timeout = siteTimeout(site)
@@ -147,14 +147,14 @@ func runPingCheck(_ context.Context, site models.Site) CheckResult {
latency := time.Since(start)
if err != nil {
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "ping failed: " + err.Error()}
return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), LatencyNs: latency.Nanoseconds(), ErrorReason: "ping failed: " + err.Error()}
}
if pinger.Statistics().PacketsRecv == 0 {
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "no ICMP response"}
return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), LatencyNs: latency.Nanoseconds(), ErrorReason: "no ICMP response"}
}
stats := pinger.Statistics()
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: stats.AvgRtt.Nanoseconds()}
return CheckResult{SiteID: site.ID, Status: string(models.StatusUp), LatencyNs: stats.AvgRtt.Nanoseconds()}
}
func runPortCheck(_ context.Context, site models.Site) CheckResult {
@@ -170,10 +170,10 @@ func runPortCheck(_ context.Context, site models.Site) CheckResult {
latency := time.Since(start)
if err != nil {
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: truncateError(err.Error(), maxErrorLength)}
return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), LatencyNs: latency.Nanoseconds(), ErrorReason: truncateError(err.Error(), maxErrorLength)}
}
_ = conn.Close()
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
return CheckResult{SiteID: site.ID, Status: string(models.StatusUp), LatencyNs: latency.Nanoseconds()}
}
func runDNSCheck(_ context.Context, site models.Site) CheckResult {
@@ -221,12 +221,12 @@ func runDNSCheck(_ context.Context, site models.Site) CheckResult {
latency := time.Since(start)
if err != nil {
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS query failed: " + err.Error()}
return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS query failed: " + err.Error()}
}
if r.Rcode != dns.RcodeSuccess {
return CheckResult{SiteID: site.ID, Status: "DOWN", StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS RCODE: " + dns.RcodeToString[r.Rcode]}
return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS RCODE: " + dns.RcodeToString[r.Rcode]}
}
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
return CheckResult{SiteID: site.ID, Status: string(models.StatusUp), LatencyNs: latency.Nanoseconds()}
}
func siteTimeout(site models.Site) time.Duration {
+51 -52
View File
@@ -334,7 +334,7 @@ func (e *Engine) RecordHeartbeat(token string) bool {
}
var (
prevStatus string
prevStatus models.Status
name string
alertID int
downSince time.Time
@@ -346,12 +346,12 @@ func (e *Engine) RecordHeartbeat(token string) bool {
downSince = s.StatusChangedAt // captured before mutation = when it went down
s.LastCheck = time.Now()
s.Status = "UP"
s.Status = models.StatusUp
s.FailureCount = 0
s.Latency = 0
s.LastError = ""
s.LastSuccessAt = time.Now()
if prevStatus != "UP" {
if prevStatus != models.StatusUp {
s.StatusChangedAt = time.Now()
}
})
@@ -360,13 +360,13 @@ func (e *Engine) RecordHeartbeat(token string) bool {
}
switch prevStatus {
case "PENDING":
case models.StatusPending:
e.AddLog(fmt.Sprintf("Push Monitor '%s' received first heartbeat", name))
case "LATE":
case models.StatusLate:
e.AddLog(fmt.Sprintf("Push Monitor '%s' heartbeat arrived (was late)", name))
case "STALE":
case models.StatusStale:
e.AddLog(fmt.Sprintf("Push Monitor '%s' heartbeat arrived (was stale)", name))
case "DOWN":
case models.StatusDown:
downDur := ""
if !downSince.IsZero() {
downDur = fmt.Sprintf(" (was down %s)", fmtDurationShort(time.Since(downSince)))
@@ -375,8 +375,8 @@ func (e *Engine) RecordHeartbeat(token string) bool {
go e.triggerAlert(alertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.%s", name, downDur))
}
if prevStatus != "UP" && prevStatus != "PENDING" {
e.enqueueWrite(writeStateChange{siteID: targetID, fromStatus: prevStatus, toStatus: "UP"})
if prevStatus != models.StatusUp && prevStatus != models.StatusPending {
e.enqueueWrite(writeStateChange{siteID: targetID, fromStatus: string(prevStatus), toStatus: string(models.StatusUp)})
}
return true
@@ -434,12 +434,12 @@ func (e *Engine) Start(ctx context.Context) {
e.mu.RUnlock()
if !exists {
e.mu.Lock()
s.Status = "PENDING"
s.Status = models.StatusPending
if h, ok := e.GetHistory(s.ID); ok && len(h.Statuses) > 0 {
if h.Statuses[len(h.Statuses)-1] {
s.Status = "UP"
s.Status = models.StatusUp
} else {
s.Status = "DOWN"
s.Status = models.StatusDown
}
if len(h.Latencies) > 0 {
s.Latency = h.Latencies[len(h.Latencies)-1]
@@ -686,7 +686,7 @@ func (e *Engine) checkByID(ctx context.Context, id int) {
}
func (e *Engine) checkPush(_ context.Context, site models.Site) {
if site.Status == "PENDING" {
if site.Status == models.StatusPending {
return
}
@@ -702,16 +702,16 @@ func (e *Engine) checkPush(_ context.Context, site models.Site) {
now := time.Now()
if now.After(graceEnd) {
if site.Status != "DOWN" {
e.handleStatusChange(site, "DOWN", 0, 0, "heartbeat missed")
if site.Status != models.StatusDown {
e.handleStatusChange(site, string(models.StatusDown), 0, 0, "heartbeat missed")
}
} else if now.After(staleMark) {
if site.Status != "STALE" {
e.handleStatusChange(site, "STALE", 0, 0, "heartbeat stale")
if site.Status != models.StatusStale {
e.handleStatusChange(site, string(models.StatusStale), 0, 0, "heartbeat stale")
}
} else if now.After(overdue) {
if site.Status != "LATE" {
e.handleStatusChange(site, "LATE", 0, 0, "heartbeat overdue")
if site.Status != models.StatusLate {
e.handleStatusChange(site, string(models.StatusLate), 0, 0, "heartbeat overdue")
}
}
}
@@ -727,9 +727,10 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
}
inMaint := e.isInMaintenance(snap.ID)
status := models.Status(rawStatus)
var (
prev, next string
prev, next models.Status
name, typ string
alertID int
failCount, maxRetries int
@@ -745,7 +746,7 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
_, exists := e.applyState(snap.ID, func(s *models.Site) {
// A non-UP result computed from a stale snapshot must not override a
// heartbeat (or newer check) that landed while we were evaluating.
if rawStatus != "UP" && s.LastCheck.After(snap.LastCheck) {
if status != models.StatusUp && s.LastCheck.After(snap.LastCheck) {
skipped = true
return
}
@@ -764,24 +765,24 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
s.HasSSL = snap.HasSSL
s.CertExpiry = snap.CertExpiry
s.LastError = errorReason
if rawStatus == "UP" {
if status == models.StatusUp {
s.LastSuccessAt = time.Now()
s.LastError = ""
}
// Status + failure-count transition, based on the CURRENT live status.
if rawStatus == "UP" {
if status == models.StatusUp {
s.FailureCount = 0
s.Status = "UP"
s.Status = models.StatusUp
} else {
if s.FailureCount <= s.MaxRetries {
s.FailureCount++
}
if s.FailureCount > s.MaxRetries {
if s.Status != rawStatus {
if s.Status != status {
confirmedDown = true
}
s.Status = rawStatus
s.Status = status
s.FailureCount = s.MaxRetries + 1
} else {
failedCheck = true
@@ -789,16 +790,16 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
}
failCount = s.FailureCount
if s.Status != prev && prev != "PENDING" {
if s.Status != prev && prev != models.StatusPending {
s.StatusChangedAt = time.Now()
} else if s.StatusChangedAt.IsZero() && s.Status != "PENDING" {
} else if s.StatusChangedAt.IsZero() && s.Status != models.StatusPending {
s.StatusChangedAt = time.Now()
}
// SSL expiry warning (fresh HasSSL/CertExpiry + config threshold).
if typ == "http" && s.CheckSSL && s.HasSSL {
days := int(time.Until(s.CertExpiry).Hours() / 24)
if days <= s.ExpiryThreshold && !s.SentSSLWarning && rawStatus != "SSL EXP" {
if days <= s.ExpiryThreshold && !s.SentSSLWarning && status != models.StatusSSLExp {
sslWarnFire = true
sslDays = days
s.SentSSLWarning = true
@@ -815,7 +816,7 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
return
}
e.recordCheck(snap.ID, latency, rawStatus == "UP")
e.recordCheck(snap.ID, latency, status == models.StatusUp)
if confirmedDown {
if errorReason != "" {
@@ -827,8 +828,8 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
e.AddLog(fmt.Sprintf("Monitor '%s' failed check %d/%d", name, failCount, maxRetries))
}
if changed && prev != "PENDING" {
e.enqueueWrite(writeStateChange{siteID: snap.ID, fromStatus: prev, toStatus: next, reason: errorReason})
if changed && prev != models.StatusPending {
e.enqueueWrite(writeStateChange{siteID: snap.ID, fromStatus: string(prev), toStatus: string(next), reason: errorReason})
}
if sslWarnFire {
@@ -839,13 +840,11 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
}
}
isBroken := func(s string) bool { return s == "DOWN" || s == "SSL EXP" }
if prev == "UP" && next == "LATE" {
if prev == models.StatusUp && next == models.StatusLate {
e.AddLog(fmt.Sprintf("Monitor '%s' heartbeat overdue", name))
}
if !isBroken(prev) && isBroken(next) && next != "PENDING" {
if !prev.IsBroken() && next.IsBroken() && next != models.StatusPending {
if inMaint {
e.AddLog(fmt.Sprintf("Monitor '%s' is DOWN (alerts suppressed — maintenance)", name))
} else {
@@ -859,7 +858,7 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
e.triggerAlert(alertID, "🚨 ALERT", msg)
}
}
if isBroken(prev) && next == "UP" {
if prev.IsBroken() && next == models.StatusUp {
downDur := ""
if !downSince.IsZero() {
downDur = fmt.Sprintf(" (was down %s)", fmtDurationShort(time.Since(downSince)))
@@ -869,7 +868,7 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
e.triggerAlert(alertID, "✅ RECOVERY", fmt.Sprintf("Monitor '%s' is UP%s", name, downDur))
}
}
if prev == "LATE" && next == "UP" && !isBroken(prev) {
if prev == models.StatusLate && next == models.StatusUp && !prev.IsBroken() {
e.AddLog(fmt.Sprintf("Monitor '%s' heartbeat arrived (was late)", name))
}
}
@@ -991,12 +990,12 @@ func (e *Engine) GetDisplayStatus(site models.Site) string {
if e.isInMaintenance(site.ID) {
return "MAINT"
}
return site.Status
return string(site.Status)
}
func (e *Engine) checkGroup(_ context.Context, site models.Site) {
e.mu.RLock()
status := "UP"
status := models.StatusUp
hasChildren := false
for _, child := range e.liveState {
if child.ParentID != site.ID || child.Type == "group" {
@@ -1006,20 +1005,20 @@ func (e *Engine) checkGroup(_ context.Context, site models.Site) {
if child.Paused || e.isInMaintenance(child.ID) {
continue
}
if child.Status == "DOWN" || child.Status == "SSL EXP" {
status = "DOWN"
} else if child.Status == "STALE" && status != "DOWN" {
status = "STALE"
} else if child.Status == "LATE" && status != "DOWN" && status != "STALE" {
status = "LATE"
} else if child.Status == "PENDING" && status != "DOWN" && status != "STALE" && status != "LATE" {
status = "PENDING"
if child.Status == models.StatusDown || child.Status == models.StatusSSLExp {
status = models.StatusDown
} else if child.Status == models.StatusStale && status != models.StatusDown {
status = models.StatusStale
} else if child.Status == models.StatusLate && status != models.StatusDown && status != models.StatusStale {
status = models.StatusLate
} else if child.Status == models.StatusPending && status != models.StatusDown && status != models.StatusStale && status != models.StatusLate {
status = models.StatusPending
}
}
e.mu.RUnlock()
if !hasChildren {
status = "PENDING"
status = models.StatusPending
}
e.applyState(site.ID, func(s *models.Site) {
@@ -1072,15 +1071,15 @@ func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, i
aggUp, avgLatency := AggregateStatus(results, e.aggStrategy)
rawStatus := "UP"
probeStatus := models.StatusUp
if !aggUp {
rawStatus = "DOWN"
probeStatus = models.StatusDown
}
updatedSite := site
updatedSite.Latency = time.Duration(avgLatency)
updatedSite.LastCheck = time.Now()
e.handleStatusChange(updatedSite, rawStatus, 0, time.Duration(avgLatency), errorReason)
e.handleStatusChange(updatedSite, string(probeStatus), 0, time.Duration(avgLatency), errorReason)
}
func (e *Engine) GetProbeResults(siteID int) map[string]NodeResult {
+9 -13
View File
@@ -16,14 +16,14 @@ type SLAReport struct {
MTBF time.Duration
}
func ComputeSLA(changes []models.StateChange, currentStatus string, window time.Duration) SLAReport {
func ComputeSLA(changes []models.StateChange, currentStatus models.Status, window time.Duration) SLAReport {
now := time.Now()
windowStart := now.Add(-window)
report := SLAReport{Window: window}
if len(changes) == 0 {
if isDown(currentStatus) {
if models.Status(currentStatus).IsBroken() {
report.UptimePct = 0
report.Downtime = window
} else {
@@ -40,7 +40,7 @@ func ComputeSLA(changes []models.StateChange, currentStatus string, window time.
}
// Determine status at window start: last transition before or at windowStart.
statusAtStart := "UP"
statusAtStart := string(models.StatusUp)
for i := len(sorted) - 1; i >= 0; i-- {
if !sorted[i].ChangedAt.After(windowStart) {
statusAtStart = sorted[i].ToStatus
@@ -51,7 +51,7 @@ func ComputeSLA(changes []models.StateChange, currentStatus string, window time.
var upTime, downTime time.Duration
var outages []time.Duration
cursor := windowStart
wasDown := isDown(statusAtStart)
wasDown := models.Status(statusAtStart).IsBroken()
if wasDown {
report.OutageCount = 1
@@ -77,7 +77,7 @@ func ComputeSLA(changes []models.StateChange, currentStatus string, window time.
upTime += seg
}
newDown := isDown(sc.ToStatus)
newDown := models.Status(sc.ToStatus).IsBroken()
if !wasDown && newDown {
report.OutageCount++
outageStart = sc.ChangedAt
@@ -127,7 +127,7 @@ func ComputeSLA(changes []models.StateChange, currentStatus string, window time.
return report
}
func ComputeDailyBreakdown(changes []models.StateChange, currentStatus string, days int, now time.Time) []DayReport {
func ComputeDailyBreakdown(changes []models.StateChange, currentStatus models.Status, days int, now time.Time) []DayReport {
reports := make([]DayReport, days)
for i := 0; i < days; i++ {
@@ -159,10 +159,6 @@ type DayReport struct {
UptimePct float64
}
func isDown(status string) bool {
return status == "DOWN" || status == "SSL EXP"
}
func filterChangesForWindow(changes []models.StateChange, start, end time.Time) []models.StateChange {
var filtered []models.StateChange
for _, sc := range changes {
@@ -180,7 +176,7 @@ func inferStatusAt(changes []models.StateChange, at time.Time) string {
return sc.ToStatus
}
}
return "UP"
return string(models.StatusUp)
}
func computeSLAForWindow(changes []models.StateChange, statusAtStart string, start, end time.Time) float64 {
@@ -193,7 +189,7 @@ func computeSLAForWindow(changes []models.StateChange, statusAtStart string, sta
var upTime, downTime time.Duration
cursor := start
wasDown := isDown(statusAtStart)
wasDown := models.Status(statusAtStart).IsBroken()
for _, sc := range sorted {
if sc.ChangedAt.Before(start) || !sc.ChangedAt.Before(end) {
@@ -205,7 +201,7 @@ func computeSLAForWindow(changes []models.StateChange, statusAtStart string, sta
} else {
upTime += seg
}
wasDown = isDown(sc.ToStatus)
wasDown = models.Status(sc.ToStatus).IsBroken()
cursor = sc.ChangedAt
}
+13 -13
View File
@@ -137,24 +137,24 @@ func TestComputeDailyBreakdown(t *testing.T) {
}
}
func TestIsDown(t *testing.T) {
if !isDown("DOWN") {
t.Error("DOWN should be down")
func TestIsBroken(t *testing.T) {
if !models.StatusDown.IsBroken() {
t.Error("DOWN should be broken")
}
if !isDown("SSL EXP") {
t.Error("SSL EXP should be down")
if !models.StatusSSLExp.IsBroken() {
t.Error("SSL EXP should be broken")
}
if isDown("UP") {
t.Error("UP should not be down")
if models.StatusUp.IsBroken() {
t.Error("UP should not be broken")
}
if isDown("LATE") {
t.Error("LATE should not be down")
if models.StatusLate.IsBroken() {
t.Error("LATE should not be broken")
}
if isDown("STALE") {
t.Error("STALE should not be down")
if models.StatusStale.IsBroken() {
t.Error("STALE should not be broken")
}
if isDown("PENDING") {
t.Error("PENDING should not be down")
if models.StatusPending.IsBroken() {
t.Error("PENDING should not be broken")
}
}