diff --git a/internal/metrics/prometheus.go b/internal/metrics/prometheus.go new file mode 100644 index 0000000..24f4faa --- /dev/null +++ b/internal/metrics/prometheus.go @@ -0,0 +1,99 @@ +package metrics + +import ( + "fmt" + "go-upkeep/internal/models" + "go-upkeep/internal/monitor" + "net/http" + "sort" + "strings" +) + +func Handler(eng *monitor.Engine) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + sites := eng.GetAllSites() + sort.Slice(sites, func(i, j int) bool { return sites[i].ID < sites[j].ID }) + + var b strings.Builder + + writeHelp(&b, "upkeep_monitor_up", "gauge", "Whether the monitor is up (1) or down (0).") + for _, s := range sites { + val := 0 + if s.Status == "UP" { + val = 1 + } + writeGauge(&b, "upkeep_monitor_up", labels(s), float64(val)) + } + + writeHelp(&b, "upkeep_monitor_latency_seconds", "gauge", "Last check latency in seconds.") + for _, s := range sites { + writeGauge(&b, "upkeep_monitor_latency_seconds", labels(s), s.Latency.Seconds()) + } + + writeHelp(&b, "upkeep_monitor_status_code", "gauge", "HTTP response status code of the last check.") + for _, s := range sites { + if s.Type != "http" { + continue + } + writeGauge(&b, "upkeep_monitor_status_code", labels(s), float64(s.StatusCode)) + } + + writeHelp(&b, "upkeep_monitor_check_timestamp_seconds", "gauge", "Unix timestamp of the last check.") + for _, s := range sites { + if s.LastCheck.IsZero() { + continue + } + writeGauge(&b, "upkeep_monitor_check_timestamp_seconds", labels(s), float64(s.LastCheck.Unix())) + } + + writeHelp(&b, "upkeep_monitor_paused", "gauge", "Whether the monitor is paused (1) or active (0).") + for _, s := range sites { + val := 0 + if s.Paused { + val = 1 + } + writeGauge(&b, "upkeep_monitor_paused", labels(s), float64(val)) + } + + writeHelp(&b, "upkeep_monitor_cert_expiry_timestamp_seconds", "gauge", "Unix timestamp when the SSL certificate expires.") + for _, s := range sites { + if !s.HasSSL || s.CertExpiry.IsZero() { + continue + } + writeGauge(&b, "upkeep_monitor_cert_expiry_timestamp_seconds", labels(s), float64(s.CertExpiry.Unix())) + } + + writeHelp(&b, "upkeep_monitor_checks_total", "counter", "Total number of checks performed.") + writeHelp(&b, "upkeep_monitor_checks_up_total", "counter", "Total number of successful checks.") + for _, s := range sites { + h, ok := eng.GetHistory(s.ID) + if !ok { + continue + } + writeGauge(&b, "upkeep_monitor_checks_total", labels(s), float64(h.TotalChecks)) + writeGauge(&b, "upkeep_monitor_checks_up_total", labels(s), float64(h.UpChecks)) + } + + w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + w.Write([]byte(b.String())) + } +} + +func labels(s models.Site) string { + return fmt.Sprintf(`id="%d",name="%s",type="%s"`, s.ID, escapeLabelValue(s.Name), s.Type) +} + +func escapeLabelValue(s string) string { + s = strings.ReplaceAll(s, `\`, `\\`) + s = strings.ReplaceAll(s, `"`, `\"`) + s = strings.ReplaceAll(s, "\n", `\n`) + return s +} + +func writeHelp(b *strings.Builder, name, typ, help string) { + fmt.Fprintf(b, "# HELP %s %s\n# TYPE %s %s\n", name, help, name, typ) +} + +func writeGauge(b *strings.Builder, name, labels string, val float64) { + fmt.Fprintf(b, "%s{%s} %g\n", name, labels, val) +} diff --git a/internal/metrics/prometheus_test.go b/internal/metrics/prometheus_test.go new file mode 100644 index 0000000..7cbf680 --- /dev/null +++ b/internal/metrics/prometheus_test.go @@ -0,0 +1,96 @@ +package metrics + +import ( + "context" + "go-upkeep/internal/models" + "go-upkeep/internal/monitor" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +type mockStore struct { + sites []models.Site +} + +func (m *mockStore) Init() error { return nil } +func (m *mockStore) GetSites() ([]models.Site, error) { return m.sites, nil } +func (m *mockStore) AddSite(models.Site) error { return nil } +func (m *mockStore) UpdateSite(models.Site) error { return nil } +func (m *mockStore) UpdateSitePaused(int, bool) error { return nil } +func (m *mockStore) DeleteSite(int) error { return nil } +func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error) { return nil, nil } +func (m *mockStore) GetAlert(int) (models.AlertConfig, error) { return models.AlertConfig{}, nil } +func (m *mockStore) AddAlert(string, string, map[string]string) error { return nil } +func (m *mockStore) UpdateAlert(int, string, string, map[string]string) error { return nil } +func (m *mockStore) DeleteAlert(int) error { return nil } +func (m *mockStore) GetAllUsers() ([]models.User, error) { return nil, nil } +func (m *mockStore) AddUser(string, string, string) error { return nil } +func (m *mockStore) UpdateUser(int, string, string, string) error { return nil } +func (m *mockStore) DeleteUser(int) error { return nil } +func (m *mockStore) SaveCheck(int, int64, bool) error { return nil } +func (m *mockStore) LoadAllHistory(int) (map[int][]models.CheckRecord, error) { + return nil, nil +} +func (m *mockStore) ExportData() (models.Backup, error) { return models.Backup{}, nil } +func (m *mockStore) ImportData(models.Backup) error { return nil } + +func TestMetricsHandler(t *testing.T) { + ms := &mockStore{ + sites: []models.Site{ + {ID: 1, Name: "Example", URL: "https://example.com", Type: "http", Interval: 30}, + {ID: 2, Name: "DNS Check", Type: "dns", Interval: 60}, + }, + } + eng := monitor.NewEngine(ms) + ctx, cancel := context.WithCancel(context.Background()) + eng.Start(ctx) + time.Sleep(100 * time.Millisecond) + + rec := httptest.NewRecorder() + Handler(eng)(rec, httptest.NewRequest("GET", "/metrics", nil)) + cancel() + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + body := rec.Body.String() + + ct := rec.Header().Get("Content-Type") + if !strings.Contains(ct, "text/plain") { + t.Errorf("expected text/plain content type, got %q", ct) + } + + expected := []string{ + "# HELP upkeep_monitor_up", + "# TYPE upkeep_monitor_up gauge", + `upkeep_monitor_up{id="1",name="Example",type="http"}`, + `upkeep_monitor_up{id="2",name="DNS Check",type="dns"}`, + "# HELP upkeep_monitor_latency_seconds", + "# HELP upkeep_monitor_paused", + "# HELP upkeep_monitor_checks_total", + } + for _, s := range expected { + if !strings.Contains(body, s) { + t.Errorf("missing expected line: %s", s) + } + } +} + +func TestEscapeLabelValue(t *testing.T) { + cases := []struct{ in, want string }{ + {`simple`, `simple`}, + {`has "quotes"`, `has \"quotes\"`}, + {"has\nnewline", `has\nnewline`}, + {`back\slash`, `back\\slash`}, + } + for _, tc := range cases { + got := escapeLabelValue(tc.in) + if got != tc.want { + t.Errorf("escapeLabelValue(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} diff --git a/internal/server/server.go b/internal/server/server.go index ac26bd2..fdf7f9b 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "go-upkeep/internal/importer" + "go-upkeep/internal/metrics" "go-upkeep/internal/models" "go-upkeep/internal/monitor" "go-upkeep/internal/store" @@ -242,7 +243,10 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) { w.Write([]byte(fmt.Sprintf("Imported %d monitors, %d alerts from Kuma v%s", len(backup.Sites), len(backup.Alerts), kb.Version))) }) - // 6. Status Page + // 6. Prometheus Metrics + mux.HandleFunc("/metrics", metrics.Handler(eng)) + + // 7. Status Page if cfg.EnableStatus { mux.HandleFunc("/status", func(w http.ResponseWriter, r *http.Request) { renderStatusPage(w, cfg.Title, eng) }) mux.HandleFunc("/status/json", func(w http.ResponseWriter, r *http.Request) {