feat(metrics): add Prometheus /metrics endpoint
Zero-dependency Prometheus text exposition format. Exposes monitor up/down, latency, status code, check timestamps, pause state, SSL cert expiry, and check counters — all from in-memory state.
This commit is contained in:
@@ -0,0 +1,99 @@
|
|||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"go-upkeep/internal/models"
|
||||||
|
"go-upkeep/internal/monitor"
|
||||||
|
"net/http"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Handler(eng *monitor.Engine) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
sites := eng.GetAllSites()
|
||||||
|
sort.Slice(sites, func(i, j int) bool { return sites[i].ID < sites[j].ID })
|
||||||
|
|
||||||
|
var b strings.Builder
|
||||||
|
|
||||||
|
writeHelp(&b, "upkeep_monitor_up", "gauge", "Whether the monitor is up (1) or down (0).")
|
||||||
|
for _, s := range sites {
|
||||||
|
val := 0
|
||||||
|
if s.Status == "UP" {
|
||||||
|
val = 1
|
||||||
|
}
|
||||||
|
writeGauge(&b, "upkeep_monitor_up", labels(s), float64(val))
|
||||||
|
}
|
||||||
|
|
||||||
|
writeHelp(&b, "upkeep_monitor_latency_seconds", "gauge", "Last check latency in seconds.")
|
||||||
|
for _, s := range sites {
|
||||||
|
writeGauge(&b, "upkeep_monitor_latency_seconds", labels(s), s.Latency.Seconds())
|
||||||
|
}
|
||||||
|
|
||||||
|
writeHelp(&b, "upkeep_monitor_status_code", "gauge", "HTTP response status code of the last check.")
|
||||||
|
for _, s := range sites {
|
||||||
|
if s.Type != "http" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
writeGauge(&b, "upkeep_monitor_status_code", labels(s), float64(s.StatusCode))
|
||||||
|
}
|
||||||
|
|
||||||
|
writeHelp(&b, "upkeep_monitor_check_timestamp_seconds", "gauge", "Unix timestamp of the last check.")
|
||||||
|
for _, s := range sites {
|
||||||
|
if s.LastCheck.IsZero() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
writeGauge(&b, "upkeep_monitor_check_timestamp_seconds", labels(s), float64(s.LastCheck.Unix()))
|
||||||
|
}
|
||||||
|
|
||||||
|
writeHelp(&b, "upkeep_monitor_paused", "gauge", "Whether the monitor is paused (1) or active (0).")
|
||||||
|
for _, s := range sites {
|
||||||
|
val := 0
|
||||||
|
if s.Paused {
|
||||||
|
val = 1
|
||||||
|
}
|
||||||
|
writeGauge(&b, "upkeep_monitor_paused", labels(s), float64(val))
|
||||||
|
}
|
||||||
|
|
||||||
|
writeHelp(&b, "upkeep_monitor_cert_expiry_timestamp_seconds", "gauge", "Unix timestamp when the SSL certificate expires.")
|
||||||
|
for _, s := range sites {
|
||||||
|
if !s.HasSSL || s.CertExpiry.IsZero() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
writeGauge(&b, "upkeep_monitor_cert_expiry_timestamp_seconds", labels(s), float64(s.CertExpiry.Unix()))
|
||||||
|
}
|
||||||
|
|
||||||
|
writeHelp(&b, "upkeep_monitor_checks_total", "counter", "Total number of checks performed.")
|
||||||
|
writeHelp(&b, "upkeep_monitor_checks_up_total", "counter", "Total number of successful checks.")
|
||||||
|
for _, s := range sites {
|
||||||
|
h, ok := eng.GetHistory(s.ID)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
writeGauge(&b, "upkeep_monitor_checks_total", labels(s), float64(h.TotalChecks))
|
||||||
|
writeGauge(&b, "upkeep_monitor_checks_up_total", labels(s), float64(h.UpChecks))
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||||
|
w.Write([]byte(b.String()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func labels(s models.Site) string {
|
||||||
|
return fmt.Sprintf(`id="%d",name="%s",type="%s"`, s.ID, escapeLabelValue(s.Name), s.Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
func escapeLabelValue(s string) string {
|
||||||
|
s = strings.ReplaceAll(s, `\`, `\\`)
|
||||||
|
s = strings.ReplaceAll(s, `"`, `\"`)
|
||||||
|
s = strings.ReplaceAll(s, "\n", `\n`)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeHelp(b *strings.Builder, name, typ, help string) {
|
||||||
|
fmt.Fprintf(b, "# HELP %s %s\n# TYPE %s %s\n", name, help, name, typ)
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeGauge(b *strings.Builder, name, labels string, val float64) {
|
||||||
|
fmt.Fprintf(b, "%s{%s} %g\n", name, labels, val)
|
||||||
|
}
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"go-upkeep/internal/models"
|
||||||
|
"go-upkeep/internal/monitor"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type mockStore struct {
|
||||||
|
sites []models.Site
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mockStore) Init() error { return nil }
|
||||||
|
func (m *mockStore) GetSites() ([]models.Site, error) { return m.sites, nil }
|
||||||
|
func (m *mockStore) AddSite(models.Site) error { return nil }
|
||||||
|
func (m *mockStore) UpdateSite(models.Site) error { return nil }
|
||||||
|
func (m *mockStore) UpdateSitePaused(int, bool) error { return nil }
|
||||||
|
func (m *mockStore) DeleteSite(int) error { return nil }
|
||||||
|
func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error) { return nil, nil }
|
||||||
|
func (m *mockStore) GetAlert(int) (models.AlertConfig, error) { return models.AlertConfig{}, nil }
|
||||||
|
func (m *mockStore) AddAlert(string, string, map[string]string) error { return nil }
|
||||||
|
func (m *mockStore) UpdateAlert(int, string, string, map[string]string) error { return nil }
|
||||||
|
func (m *mockStore) DeleteAlert(int) error { return nil }
|
||||||
|
func (m *mockStore) GetAllUsers() ([]models.User, error) { return nil, nil }
|
||||||
|
func (m *mockStore) AddUser(string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) UpdateUser(int, string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) DeleteUser(int) error { return nil }
|
||||||
|
func (m *mockStore) SaveCheck(int, int64, bool) error { return nil }
|
||||||
|
func (m *mockStore) LoadAllHistory(int) (map[int][]models.CheckRecord, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
func (m *mockStore) ExportData() (models.Backup, error) { return models.Backup{}, nil }
|
||||||
|
func (m *mockStore) ImportData(models.Backup) error { return nil }
|
||||||
|
|
||||||
|
func TestMetricsHandler(t *testing.T) {
|
||||||
|
ms := &mockStore{
|
||||||
|
sites: []models.Site{
|
||||||
|
{ID: 1, Name: "Example", URL: "https://example.com", Type: "http", Interval: 30},
|
||||||
|
{ID: 2, Name: "DNS Check", Type: "dns", Interval: 60},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
eng := monitor.NewEngine(ms)
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
eng.Start(ctx)
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
Handler(eng)(rec, httptest.NewRequest("GET", "/metrics", nil))
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected 200, got %d", rec.Code)
|
||||||
|
}
|
||||||
|
|
||||||
|
body := rec.Body.String()
|
||||||
|
|
||||||
|
ct := rec.Header().Get("Content-Type")
|
||||||
|
if !strings.Contains(ct, "text/plain") {
|
||||||
|
t.Errorf("expected text/plain content type, got %q", ct)
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := []string{
|
||||||
|
"# HELP upkeep_monitor_up",
|
||||||
|
"# TYPE upkeep_monitor_up gauge",
|
||||||
|
`upkeep_monitor_up{id="1",name="Example",type="http"}`,
|
||||||
|
`upkeep_monitor_up{id="2",name="DNS Check",type="dns"}`,
|
||||||
|
"# HELP upkeep_monitor_latency_seconds",
|
||||||
|
"# HELP upkeep_monitor_paused",
|
||||||
|
"# HELP upkeep_monitor_checks_total",
|
||||||
|
}
|
||||||
|
for _, s := range expected {
|
||||||
|
if !strings.Contains(body, s) {
|
||||||
|
t.Errorf("missing expected line: %s", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEscapeLabelValue(t *testing.T) {
|
||||||
|
cases := []struct{ in, want string }{
|
||||||
|
{`simple`, `simple`},
|
||||||
|
{`has "quotes"`, `has \"quotes\"`},
|
||||||
|
{"has\nnewline", `has\nnewline`},
|
||||||
|
{`back\slash`, `back\\slash`},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
got := escapeLabelValue(tc.in)
|
||||||
|
if got != tc.want {
|
||||||
|
t.Errorf("escapeLabelValue(%q) = %q, want %q", tc.in, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"go-upkeep/internal/importer"
|
"go-upkeep/internal/importer"
|
||||||
|
"go-upkeep/internal/metrics"
|
||||||
"go-upkeep/internal/models"
|
"go-upkeep/internal/models"
|
||||||
"go-upkeep/internal/monitor"
|
"go-upkeep/internal/monitor"
|
||||||
"go-upkeep/internal/store"
|
"go-upkeep/internal/store"
|
||||||
@@ -242,7 +243,10 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) {
|
|||||||
w.Write([]byte(fmt.Sprintf("Imported %d monitors, %d alerts from Kuma v%s", len(backup.Sites), len(backup.Alerts), kb.Version)))
|
w.Write([]byte(fmt.Sprintf("Imported %d monitors, %d alerts from Kuma v%s", len(backup.Sites), len(backup.Alerts), kb.Version)))
|
||||||
})
|
})
|
||||||
|
|
||||||
// 6. Status Page
|
// 6. Prometheus Metrics
|
||||||
|
mux.HandleFunc("/metrics", metrics.Handler(eng))
|
||||||
|
|
||||||
|
// 7. Status Page
|
||||||
if cfg.EnableStatus {
|
if cfg.EnableStatus {
|
||||||
mux.HandleFunc("/status", func(w http.ResponseWriter, r *http.Request) { renderStatusPage(w, cfg.Title, eng) })
|
mux.HandleFunc("/status", func(w http.ResponseWriter, r *http.Request) { renderStatusPage(w, cfg.Title, eng) })
|
||||||
mux.HandleFunc("/status/json", func(w http.ResponseWriter, r *http.Request) {
|
mux.HandleFunc("/status/json", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|||||||
Reference in New Issue
Block a user