Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ec898ff943 | |||
| 38c7739995 | |||
| 5679dffffa | |||
| 9a4985e355 | |||
| 65406ce69c | |||
| 2474b341ad | |||
| b0762800ac | |||
| 08bcdd6481 | |||
| ebf8bfb097 | |||
| b62a721277 | |||
| 8f17deba67 | |||
| 026e969b74 | |||
| cfbf01274d | |||
| 26e297cbae | |||
| 0aa2f9cd8a | |||
| f17f06a1c6 | |||
| b14d5e19db | |||
| a2b38ddc60 | |||
| 5dc31108f8 | |||
| 63773b13d0 | |||
| bc3a44beac |
+4
-9
@@ -1,15 +1,10 @@
|
||||
.git
|
||||
.ssh/
|
||||
.gitea/
|
||||
tmp/
|
||||
vendor/
|
||||
|
||||
# Security: keep sensitive/local files out of Docker build context
|
||||
.ssh/
|
||||
.claude/
|
||||
.github/
|
||||
.gitea/
|
||||
CLAUDE.md
|
||||
*.db
|
||||
*.db-journal
|
||||
*.local.json
|
||||
*.local.md
|
||||
*.local
|
||||
*.db
|
||||
*.db-journal
|
||||
|
||||
@@ -8,7 +8,13 @@ on:
|
||||
jobs:
|
||||
release:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: sh
|
||||
steps:
|
||||
- name: Install build tools
|
||||
run: apk add --no-cache git gcc musl-dev
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
@@ -34,9 +40,10 @@ jobs:
|
||||
env:
|
||||
GORELEASER_FORCE_TOKEN: gitea
|
||||
GITEA_TOKEN: ${{ secrets.RELEASE_TOKEN }}
|
||||
GITEA_API_URL: http://gitea:3000/api/v1
|
||||
|
||||
docker:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker-builder
|
||||
needs: [release]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
+1
-1
@@ -27,7 +27,7 @@ go.work
|
||||
# End of https://www.toptal.com/developers/gitignore/api/go
|
||||
|
||||
/uptop
|
||||
uptop.db
|
||||
uptop.db*
|
||||
|
||||
.ssh
|
||||
|
||||
|
||||
+2
-2
@@ -1,12 +1,12 @@
|
||||
version: 2
|
||||
|
||||
gitea_urls:
|
||||
api: https://gitea.lerkolabs.com/api/v1
|
||||
api: "{{ if index .Env \"GITEA_API_URL\" }}{{ .Env.GITEA_API_URL }}{{ else }}https://gitea.lerkolabs.com/api/v1{{ end }}"
|
||||
download: https://gitea.lerkolabs.com
|
||||
|
||||
release:
|
||||
gitea:
|
||||
owner: lerko
|
||||
owner: lerkolabs
|
||||
name: uptop
|
||||
|
||||
builds:
|
||||
|
||||
+78
-30
@@ -1,46 +1,94 @@
|
||||
# Changelog
|
||||
|
||||
## [2026.05.2] — 2026-05-23
|
||||
## [2026.05.5] — 2026-05-29
|
||||
|
||||
### Added
|
||||
- Comprehensive test suite (94 tests across monitor, server, cluster)
|
||||
- golangci-lint config with CI enforcement
|
||||
- Gitea Actions CI pipeline (test + lint)
|
||||
- Graceful shutdown for HTTP and SSH servers
|
||||
- Context-aware alert delivery with timeout
|
||||
- Request size limits on all POST endpoints
|
||||
- Constant-time secret comparison
|
||||
- Check interval jitter to prevent thundering herd
|
||||
- `--version` flag with build metadata injection
|
||||
- Error reason display when monitors go DOWN (#33)
|
||||
- Push monitor lifecycle — PENDING, LATE, DOWN states (#34)
|
||||
- Logs tab overhaul — severity tags, filtering, recovery durations (#35)
|
||||
- Alert channel health indicator and test alerts (#36)
|
||||
- TUI screenshots in `assets/` (#32)
|
||||
- CI status badge in README
|
||||
|
||||
### Fixed
|
||||
- Silent JSON unmarshal failures in alert settings
|
||||
- Panic on crypto/rand failure replaced with error return
|
||||
- Alert delivery errors now logged instead of swallowed
|
||||
- log.Fatalf in goroutines replaced with log.Printf
|
||||
- Deprecated LineUp/LineDown API calls
|
||||
### Changed
|
||||
- Visual polish — detail sections, column headers, alert detail (#37)
|
||||
- README rewritten with hero image, badges, collapsible install sections (#32)
|
||||
- Changelog rewritten to match actual CalVer tag history
|
||||
- Migrated to `lerkolabs` org namespace (#38)
|
||||
- Docker-compose files moved to `deploy/`
|
||||
|
||||
## [2026.05.4] — 2026-05-27
|
||||
|
||||
### Added
|
||||
- SSH user seeding from `UPTOP_ADMIN_KEY` env var and `UPTOP_KEYS` file (#31)
|
||||
- GoReleaser for binary releases
|
||||
- govulncheck in CI pipeline
|
||||
- Multi-arch Docker builds (amd64 + arm64)
|
||||
|
||||
### Changed
|
||||
- CI overhaul — Go 1.26, build caching, streamlined pipeline (#30)
|
||||
- Bumped golang.org/x/crypto v0.47.0 → v0.52.0
|
||||
- Bumped Alpine 3.21 → 3.23
|
||||
|
||||
### Security
|
||||
- Cluster secret compared with crypto/subtle (timing-safe)
|
||||
- http.MaxBytesReader on all JSON endpoints
|
||||
- ReadHeaderTimeout added to HTTP server
|
||||
- Phase 1: SSRF protection, input validation, safe dial (#26)
|
||||
- Phase 2: TLS hardening, auth bypass fixes, rate limiting (#27)
|
||||
- Phase 3: Graceful degradation, connection limits, timeout enforcement (#28)
|
||||
- Phase 4: Code quality, error handling, linter fixes (#29)
|
||||
|
||||
## [2026.05.1] — 2026-05-14
|
||||
## [2026.05.3] — 2026-05-25
|
||||
|
||||
### Added
|
||||
- Theme system with 5 dark palettes — Default, Dracula, Nord, Tokyo Night, Gruvbox (#24)
|
||||
- `--version` flag with build metadata injection
|
||||
- Gitea Actions CI pipeline — test + lint (#20)
|
||||
- golangci-lint configuration
|
||||
- Comprehensive test suite — 94 tests across monitor, server, cluster (#19)
|
||||
- CONTRIBUTING.md and SECURITY.md
|
||||
|
||||
### Changed
|
||||
- Renamed project from go-upkeep to uptop (#25)
|
||||
- Updated LICENSE with dual copyright for independent fork
|
||||
|
||||
### Fixed
|
||||
- Form validators scoped to relevant monitor types (#23)
|
||||
- Graceful shutdown for HTTP, SSH servers and database (#19)
|
||||
- Constant-time secret comparison, request size limits (#19)
|
||||
- Check interval jitter to prevent thundering herd (#19)
|
||||
- TUI visual polish — zebra striping, group icons, sparkline stats (#18)
|
||||
|
||||
## [2026.05.2] — 2026-05-22
|
||||
|
||||
### Added
|
||||
- Incident management and maintenance windows (#17)
|
||||
- Production docker-compose.yml
|
||||
|
||||
### Fixed
|
||||
- Viewport sizing and dynamic chrome calculation (#16)
|
||||
- Form height constrained to terminal with resize forwarding
|
||||
- Maintenance'd monitors excluded from down count and pulse
|
||||
- Group status correctly skips children in maintenance
|
||||
|
||||
## [2026.05.1] — 2026-05-16
|
||||
|
||||
### Added
|
||||
- Distributed probing with leader + probe nodes
|
||||
- Config-as-code (YAML apply/export with dry-run, prune)
|
||||
- TUI visual polish (zebra striping, sparklines, breadcrumbs)
|
||||
- Incident management and maintenance windows
|
||||
- 9 alert providers (Discord, Slack, Email, Ntfy, Telegram, PagerDuty, Pushover, Gotify, Webhook)
|
||||
- Config-as-code — YAML apply/export with dry-run and prune
|
||||
- TUI polish — status bar, tab badges, detail panel, modals
|
||||
- DOWN-first sort, health pulse, site filter
|
||||
- Type icons in sites table
|
||||
- Sparkline history graphs
|
||||
- Persistent state — uptime, status, latency, and logs survive restarts
|
||||
- Push token stripping from /status/json response
|
||||
|
||||
## [2026.04.1] — Initial independent fork
|
||||
## [2026.04.1] — 2026-04-01
|
||||
|
||||
### Added
|
||||
- SSH-accessible TUI (Bubble Tea + Wish)
|
||||
- 6 check types (HTTP, Push, Ping, Port, DNS, Group)
|
||||
- SSH-accessible TUI built on Bubble Tea + Wish
|
||||
- 6 check types — HTTP, Push, Ping, Port, DNS, Group
|
||||
- 9 alert providers — Discord, Slack, Email, Ntfy, Telegram, PagerDuty, Pushover, Gotify, Webhook
|
||||
- SQLite and PostgreSQL support
|
||||
- HA clustering with automatic failover
|
||||
- Prometheus metrics endpoint
|
||||
- Public status page
|
||||
- Uptime Kuma import
|
||||
- Prometheus /metrics endpoint
|
||||
- Public status page (HTML + JSON)
|
||||
- Uptime Kuma backup import
|
||||
|
||||
@@ -1,19 +1,50 @@
|
||||
# uptop
|
||||
<div align="center">
|
||||
<h1>uptop</h1>
|
||||
<p>Self-hosted uptime monitoring with a TUI over SSH.</p>
|
||||
<p>No browser. No client install. Just <code>ssh -p 23234 your-server</code>.</p>
|
||||
|
||||
Self-hosted uptime monitor with a TUI you can access over SSH. No browser, no install on the client — just `ssh -p 23234 your-server`.
|
||||
<p>
|
||||
<a href="https://gitea.lerkolabs.com/lerkolabs/uptop/actions/workflows/ci.yml"><img src="https://gitea.lerkolabs.com/lerkolabs/uptop/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
||||
<img src="https://img.shields.io/badge/license-MIT-blue" alt="MIT License">
|
||||
<img src="https://img.shields.io/badge/go-1.26-00ADD8?logo=go&logoColor=white" alt="Go 1.26">
|
||||
<img src="https://img.shields.io/docker/pulls/lerkolabs/uptop" alt="Docker Pulls">
|
||||
</p>
|
||||
|
||||
Built on the foundation of [RDGames/go-upkeep](https://github.com/RDGames/go-upkeep).
|
||||
<img src="assets/monitors.png" alt="uptop monitors view" width="800">
|
||||
</div>
|
||||
|
||||
## What it does
|
||||
## What is this
|
||||
|
||||
- **6 check types**: HTTP, Push (heartbeat), Ping, Port, DNS, Groups
|
||||
- **9 alert providers**: Discord, Slack, Email, Ntfy, Webhook, Telegram, PagerDuty, Pushover, Gotify
|
||||
- **Config as code**: define monitors in YAML, apply declaratively, version control your setup
|
||||
- **HA clustering**: leader/follower with automatic failover
|
||||
- **Prometheus metrics**: `/metrics` endpoint for Grafana dashboards
|
||||
- **Public status page**: HTML + JSON, toggle with an env var
|
||||
- **SQLite or Postgres**: SQLite for single-node, Postgres for production
|
||||
- **Uptime Kuma import**: migrate from Kuma with one command
|
||||
An uptime monitor you manage entirely from the terminal. It runs as a server, exposes an SSH endpoint, and drops you into a full TUI — monitors, alerts, logs, nodes, all there.
|
||||
|
||||
Built on [RDGames/go-upkeep](https://github.com/RDGames/go-upkeep). Rewritten for clustering, config-as-code, and a proper dashboard.
|
||||
|
||||
## Features
|
||||
|
||||
- **6 check types** — HTTP, Push (heartbeat), Ping, Port, DNS, Groups
|
||||
- **9 alert providers** — Discord, Slack, Email, Ntfy, Webhook, Telegram, PagerDuty, Pushover, Gotify
|
||||
- **Config as code** — define monitors in YAML, apply declaratively, version control your setup
|
||||
- **HA clustering** — leader/follower with automatic failover
|
||||
- **Prometheus metrics** — `/metrics` endpoint, wire it straight to Grafana
|
||||
- **Public status page** — HTML + JSON, toggle with an env var
|
||||
- **SQLite or Postgres** — SQLite for single-node, Postgres for production
|
||||
- **Uptime Kuma import** — migrate from Kuma with one command
|
||||
|
||||
## Screenshots
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><img src="assets/detail.png" alt="detail panel" width="400"></td>
|
||||
<td><img src="assets/alerts.png" alt="alerts view" width="400"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><img src="assets/logs.png" alt="logs view" width="400"></td>
|
||||
<td><img src="assets/nodes.png" alt="cluster nodes" width="400"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2" align="center"><img src="assets/theme.png" alt="theme selection" width="600"></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
## Quick start
|
||||
|
||||
@@ -22,7 +53,7 @@ go run cmd/uptop/main.go
|
||||
ssh -p 23234 localhost
|
||||
```
|
||||
|
||||
Seed some demo data to see it in action:
|
||||
Want some data to look at first:
|
||||
|
||||
```bash
|
||||
go run cmd/uptop/main.go -demo
|
||||
@@ -30,22 +61,45 @@ go run cmd/uptop/main.go -demo
|
||||
|
||||
## Install
|
||||
|
||||
### From source
|
||||
<details>
|
||||
<summary><strong>Docker (recommended)</strong></summary>
|
||||
|
||||
```bash
|
||||
go install gitea.lerkolabs.com/lerko/uptop/cmd/uptop@latest
|
||||
```yaml
|
||||
services:
|
||||
uptop:
|
||||
image: lerkolabs/uptop:latest
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "23234:23234"
|
||||
- "8080:8080"
|
||||
environment:
|
||||
- UPTOP_DB_TYPE=sqlite
|
||||
- UPTOP_DB_DSN=/data/uptop.db
|
||||
- UPTOP_STATUS_ENABLED=true
|
||||
# - UPTOP_ADMIN_KEY=ssh-ed25519 AAAA... you@host
|
||||
volumes:
|
||||
- ./data:/data
|
||||
```
|
||||
|
||||
### Docker
|
||||
First run: set `UPTOP_ADMIN_KEY` to your SSH public key, or attach to the container and add it in the Users tab.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Binary</strong></summary>
|
||||
|
||||
Download from [Releases](https://gitea.lerkolabs.com/lerkolabs/uptop/releases).
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>From source</strong></summary>
|
||||
|
||||
```bash
|
||||
docker pull lerko/uptop:latest
|
||||
docker run -p 23234:23234 -p 8080:8080 -v ./data:/data lerko/uptop
|
||||
go install gitea.lerkolabs.com/lerkolabs/uptop/cmd/uptop@latest
|
||||
```
|
||||
|
||||
### Binary
|
||||
|
||||
Download from [Releases](https://gitea.lerkolabs.com/lerko/uptop/releases).
|
||||
</details>
|
||||
|
||||
## Config as code
|
||||
|
||||
@@ -63,35 +117,11 @@ uptop apply -f monitors.yaml --dry-run # see what would change
|
||||
uptop apply -f monitors.yaml --prune # delete anything not in the YAML
|
||||
```
|
||||
|
||||
See [docs/config-as-code.md](docs/config-as-code.md) for the full reference.
|
||||
|
||||
## Docker
|
||||
|
||||
```yaml
|
||||
services:
|
||||
monitor:
|
||||
build: .
|
||||
restart: unless-stopped
|
||||
stdin_open: true
|
||||
tty: true
|
||||
ports:
|
||||
- "23234:23234"
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- ./data:/data
|
||||
- ./ssh_keys:/app/.ssh
|
||||
environment:
|
||||
- UPTOP_DB_TYPE=sqlite
|
||||
- UPTOP_DB_DSN=/data/uptop.db
|
||||
- UPTOP_STATUS_ENABLED=true
|
||||
- UPTOP_CLUSTER_SECRET=change-me
|
||||
```
|
||||
|
||||
First run: attach to the container (`docker attach uptop`), go to the Users tab, add your SSH public key. Then detach with `Ctrl+P, Ctrl+Q` and connect normally over SSH.
|
||||
Full reference in [docs/config-as-code.md](docs/config-as-code.md).
|
||||
|
||||
## Environment variables
|
||||
|
||||
| Variable | Default | What it does |
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `UPTOP_PORT` | `23234` | SSH server port |
|
||||
| `UPTOP_HTTP_PORT` | `8080` | HTTP server port (status page, push, metrics) |
|
||||
@@ -103,6 +133,7 @@ First run: attach to the container (`docker attach uptop`), go to the Users tab,
|
||||
| `UPTOP_PEER_URL` | | Leader URL for follower nodes |
|
||||
| `UPTOP_CLUSTER_SECRET` | | Shared key for cluster + API auth |
|
||||
| `UPTOP_INSECURE_SKIP_VERIFY` | `false` | Skip TLS verification for checks |
|
||||
| `UPTOP_ADMIN_KEY` | | SSH public key seeded as first admin on startup |
|
||||
|
||||
## Migrating from Uptime Kuma
|
||||
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 84 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 206 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 232 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 57 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 253 KiB |
+9
-8
@@ -17,14 +17,14 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/cluster"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/config"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/importer"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/server"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/store"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/tui"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/cluster"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/config"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/importer"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/server"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/tui"
|
||||
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/ssh"
|
||||
@@ -385,6 +385,7 @@ func runServe(args []string) {
|
||||
|
||||
eng.InitHistory()
|
||||
eng.InitLogs()
|
||||
eng.InitAlertHealth()
|
||||
eng.Start(ctx)
|
||||
|
||||
tlsCert := os.Getenv("UPTOP_TLS_CERT")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
module gitea.lerkolabs.com/lerko/uptop
|
||||
module gitea.lerkolabs.com/lerkolabs/uptop
|
||||
|
||||
go 1.26.3
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
)
|
||||
|
||||
var alertClient = &http.Client{Timeout: 10 * time.Second}
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
)
|
||||
|
||||
func TestHTTPProviderDiscord(t *testing.T) {
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
|
||||
@@ -10,8 +10,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
)
|
||||
|
||||
// --- Mock Store (minimal, for monitor.NewEngine) ---
|
||||
@@ -53,21 +53,27 @@ func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return models.Pr
|
||||
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
|
||||
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
|
||||
func (m *mockStore) DeleteNode(string) error { return nil }
|
||||
func (m *mockStore) SaveLog(string) error { return nil }
|
||||
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
|
||||
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
|
||||
func (m *mockStore) SaveLog(string) error { return nil }
|
||||
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
|
||||
func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
|
||||
// --- Cluster Start Tests ---
|
||||
|
||||
|
||||
@@ -12,8 +12,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
)
|
||||
|
||||
type ProbeConfig struct {
|
||||
@@ -127,9 +127,10 @@ func probeFetchAssignments(ctx context.Context, client *http.Client, cfg ProbeCo
|
||||
}
|
||||
|
||||
type probeResultItem struct {
|
||||
SiteID int `json:"site_id"`
|
||||
LatencyNs int64 `json:"latency_ns"`
|
||||
IsUp bool `json:"is_up"`
|
||||
SiteID int `json:"site_id"`
|
||||
LatencyNs int64 `json:"latency_ns"`
|
||||
IsUp bool `json:"is_up"`
|
||||
ErrorReason string `json:"error_reason,omitempty"`
|
||||
}
|
||||
|
||||
func probeExecuteChecks(ctx context.Context, sites []models.Site, strict, insecure *http.Client, allowPrivate bool) []probeResultItem {
|
||||
@@ -154,9 +155,10 @@ loop:
|
||||
cr := monitor.RunCheck(s, strict, insecure, false, allowPrivate)
|
||||
mu.Lock()
|
||||
results = append(results, probeResultItem{
|
||||
SiteID: s.ID,
|
||||
LatencyNs: cr.LatencyNs,
|
||||
IsUp: cr.Status == "UP",
|
||||
SiteID: s.ID,
|
||||
LatencyNs: cr.LatencyNs,
|
||||
IsUp: cr.Status == "UP",
|
||||
ErrorReason: cr.ErrorReason,
|
||||
})
|
||||
mu.Unlock()
|
||||
}(site)
|
||||
|
||||
@@ -2,8 +2,8 @@ package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/store"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
|
||||
"reflect"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/store"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -5,8 +5,8 @@ import (
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/store"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ package importer
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -2,8 +2,8 @@ package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
@@ -2,13 +2,14 @@ package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
)
|
||||
|
||||
type mockStore struct {
|
||||
@@ -50,21 +51,27 @@ func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return m
|
||||
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
|
||||
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
|
||||
func (m *mockStore) DeleteNode(string) error { return nil }
|
||||
func (m *mockStore) SaveLog(string) error { return nil }
|
||||
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
|
||||
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
|
||||
func (m *mockStore) SaveLog(string) error { return nil }
|
||||
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
|
||||
func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
|
||||
func TestMetricsHandler(t *testing.T) {
|
||||
ms := &mockStore{
|
||||
|
||||
@@ -27,14 +27,26 @@ type Site struct {
|
||||
Paused bool
|
||||
Regions string
|
||||
|
||||
FailureCount int
|
||||
Status string
|
||||
StatusCode int
|
||||
Latency time.Duration
|
||||
CertExpiry time.Time
|
||||
HasSSL bool
|
||||
LastCheck time.Time
|
||||
SentSSLWarning bool
|
||||
FailureCount int
|
||||
Status string
|
||||
StatusCode int
|
||||
Latency time.Duration
|
||||
CertExpiry time.Time
|
||||
HasSSL bool
|
||||
LastCheck time.Time
|
||||
SentSSLWarning bool
|
||||
LastError string
|
||||
StatusChangedAt time.Time
|
||||
LastSuccessAt time.Time
|
||||
}
|
||||
|
||||
type StateChange struct {
|
||||
ID int
|
||||
SiteID int
|
||||
FromStatus string
|
||||
ToStatus string
|
||||
ErrorReason string
|
||||
ChangedAt time.Time
|
||||
}
|
||||
|
||||
type AlertConfig struct {
|
||||
@@ -67,6 +79,17 @@ type ProbeNode struct {
|
||||
Version string
|
||||
}
|
||||
|
||||
// AlertHealthRecord is the persisted send health of an alert channel. It lets the
|
||||
// "last sent" / health indicators survive restarts instead of resetting to "never".
|
||||
type AlertHealthRecord struct {
|
||||
AlertID int
|
||||
LastSendAt time.Time
|
||||
LastSendOK bool
|
||||
LastError string
|
||||
SendCount int
|
||||
FailCount int
|
||||
}
|
||||
|
||||
type MaintenanceWindow struct {
|
||||
ID int
|
||||
MonitorID int
|
||||
|
||||
@@ -11,10 +11,11 @@ const (
|
||||
)
|
||||
|
||||
type NodeResult struct {
|
||||
NodeID string
|
||||
IsUp bool
|
||||
LatencyNs int64
|
||||
CheckedAt time.Time
|
||||
NodeID string
|
||||
IsUp bool
|
||||
LatencyNs int64
|
||||
CheckedAt time.Time
|
||||
ErrorReason string
|
||||
}
|
||||
|
||||
func AggregateStatus(results []NodeResult, strategy AggregationStrategy) (isUp bool, avgLatencyNs int64) {
|
||||
|
||||
+35
-16
@@ -2,25 +2,27 @@ package monitor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
probing "github.com/prometheus-community/pro-bing"
|
||||
)
|
||||
|
||||
type CheckResult struct {
|
||||
SiteID int
|
||||
Status string // "UP", "DOWN", "SSL EXP"
|
||||
StatusCode int
|
||||
LatencyNs int64
|
||||
HasSSL bool
|
||||
CertExpiry time.Time
|
||||
SiteID int
|
||||
Status string // "UP", "DOWN", "SSL EXP"
|
||||
StatusCode int
|
||||
LatencyNs int64
|
||||
HasSSL bool
|
||||
CertExpiry time.Time
|
||||
ErrorReason string
|
||||
}
|
||||
|
||||
func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bool, allowPrivate ...bool) CheckResult {
|
||||
@@ -35,7 +37,7 @@ func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bo
|
||||
if ips, err := net.LookupIP(host); err == nil {
|
||||
for _, ip := range ips {
|
||||
if isPrivateIP(ip) {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "target resolves to private IP"}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -52,7 +54,7 @@ func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bo
|
||||
case "dns":
|
||||
return runDNSCheck(site)
|
||||
default:
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "unsupported monitor type: " + site.Type}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +70,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, method, site.URL, nil)
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "invalid request: " + err.Error()}
|
||||
}
|
||||
|
||||
client := strict
|
||||
@@ -88,6 +90,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
||||
|
||||
if err != nil {
|
||||
result.Status = "DOWN"
|
||||
result.ErrorReason = truncateError(err.Error(), 256)
|
||||
return result
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
@@ -95,6 +98,11 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
||||
result.StatusCode = resp.StatusCode
|
||||
if !isCodeAccepted(resp.StatusCode, site.AcceptedCodes) {
|
||||
result.Status = "DOWN"
|
||||
expected := site.AcceptedCodes
|
||||
if expected == "" {
|
||||
expected = "200-299"
|
||||
}
|
||||
result.ErrorReason = fmt.Sprintf("HTTP %d (expected %s)", resp.StatusCode, expected)
|
||||
}
|
||||
|
||||
if site.CheckSSL && resp.TLS != nil && len(resp.TLS.PeerCertificates) > 0 {
|
||||
@@ -103,6 +111,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
||||
result.CertExpiry = cert.NotAfter
|
||||
if time.Now().After(cert.NotAfter) {
|
||||
result.Status = "SSL EXP"
|
||||
result.ErrorReason = "SSL certificate expired"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,7 +126,7 @@ func runPingCheck(site models.Site) CheckResult {
|
||||
|
||||
pinger, err := probing.NewPinger(host)
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "ping setup: " + err.Error()}
|
||||
}
|
||||
pinger.Count = 1
|
||||
pinger.Timeout = siteTimeout(site)
|
||||
@@ -127,8 +136,11 @@ func runPingCheck(site models.Site) CheckResult {
|
||||
err = pinger.Run()
|
||||
latency := time.Since(start)
|
||||
|
||||
if err != nil || pinger.Statistics().PacketsRecv == 0 {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds()}
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "ping failed: " + err.Error()}
|
||||
}
|
||||
if pinger.Statistics().PacketsRecv == 0 {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "no ICMP response"}
|
||||
}
|
||||
|
||||
stats := pinger.Statistics()
|
||||
@@ -148,7 +160,7 @@ func runPortCheck(site models.Site) CheckResult {
|
||||
latency := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds()}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: truncateError(err.Error(), 256)}
|
||||
}
|
||||
_ = conn.Close()
|
||||
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
|
||||
@@ -199,10 +211,10 @@ func runDNSCheck(site models.Site) CheckResult {
|
||||
latency := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds()}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS query failed: " + err.Error()}
|
||||
}
|
||||
if r.Rcode != dns.RcodeSuccess {
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds()}
|
||||
return CheckResult{SiteID: site.ID, Status: "DOWN", StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS RCODE: " + dns.RcodeToString[r.Rcode]}
|
||||
}
|
||||
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
|
||||
}
|
||||
@@ -235,3 +247,10 @@ func isCodeAccepted(code int, accepted string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func truncateError(s string, max int) string {
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max-3] + "..."
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
)
|
||||
|
||||
func TestRunCheck_HTTP_Success(t *testing.T) {
|
||||
|
||||
+217
-29
@@ -11,18 +11,26 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/alert"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/store"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/alert"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
|
||||
)
|
||||
|
||||
const (
|
||||
maxLogEntries = 100
|
||||
pollInterval = 5 * time.Second
|
||||
pushGracePeriod = 5 * time.Second
|
||||
minCheckInterval = 5
|
||||
minPushGrace = 60 * time.Second
|
||||
)
|
||||
|
||||
type AlertHealth struct {
|
||||
LastSendAt time.Time
|
||||
LastSendOK bool
|
||||
LastError string
|
||||
SendCount int
|
||||
FailCount int
|
||||
}
|
||||
|
||||
type Engine struct {
|
||||
mu sync.RWMutex
|
||||
liveState map[int]models.Site
|
||||
@@ -42,6 +50,9 @@ type Engine struct {
|
||||
probeResults map[int]map[string]NodeResult
|
||||
aggStrategy AggregationStrategy
|
||||
|
||||
alertHealthMu sync.RWMutex
|
||||
alertHealth map[int]AlertHealth
|
||||
|
||||
db store.Store
|
||||
insecureSkipVerify bool
|
||||
allowPrivateTargets bool
|
||||
@@ -64,6 +75,7 @@ func newEngine(s store.Store, allowPrivateTargets bool) *Engine {
|
||||
histories: make(map[int]*SiteHistory),
|
||||
tokenIndex: make(map[string]int),
|
||||
probeResults: make(map[int]map[string]NodeResult),
|
||||
alertHealth: make(map[int]AlertHealth),
|
||||
aggStrategy: AggAnyDown,
|
||||
isActive: true,
|
||||
allowPrivateTargets: allowPrivateTargets,
|
||||
@@ -96,6 +108,19 @@ func sanitizeLog(s string) string {
|
||||
return s
|
||||
}
|
||||
|
||||
func fmtDurationShort(d time.Duration) string {
|
||||
if d < time.Minute {
|
||||
return fmt.Sprintf("%ds", int(d.Seconds()))
|
||||
}
|
||||
if d < time.Hour {
|
||||
return fmt.Sprintf("%dm", int(d.Minutes()))
|
||||
}
|
||||
if d < 24*time.Hour {
|
||||
return fmt.Sprintf("%dh %dm", int(d.Hours()), int(d.Minutes())%60)
|
||||
}
|
||||
return fmt.Sprintf("%dd %dh", int(d.Hours())/24, int(d.Hours())%24)
|
||||
}
|
||||
|
||||
func (e *Engine) AddLog(msg string) {
|
||||
e.logMu.Lock()
|
||||
defer e.logMu.Unlock()
|
||||
@@ -121,6 +146,26 @@ func (e *Engine) InitLogs() {
|
||||
e.logStore = logs
|
||||
}
|
||||
|
||||
// InitAlertHealth restores persisted alert send health so the dashboard shows real
|
||||
// "last sent" / health state on startup instead of resetting every channel to "never".
|
||||
func (e *Engine) InitAlertHealth() {
|
||||
records, err := e.db.LoadAlertHealth()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
e.alertHealthMu.Lock()
|
||||
defer e.alertHealthMu.Unlock()
|
||||
for id, r := range records {
|
||||
e.alertHealth[id] = AlertHealth{
|
||||
LastSendAt: r.LastSendAt,
|
||||
LastSendOK: r.LastSendOK,
|
||||
LastError: r.LastError,
|
||||
SendCount: r.SendCount,
|
||||
FailCount: r.FailCount,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) GetLogs() []string {
|
||||
e.logMu.RLock()
|
||||
defer e.logMu.RUnlock()
|
||||
@@ -186,17 +231,38 @@ func (e *Engine) RecordHeartbeat(token string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
prevStatus := site.Status
|
||||
site.LastCheck = time.Now()
|
||||
wasDown := site.Status == "DOWN"
|
||||
site.Status = "UP"
|
||||
site.FailureCount = 0
|
||||
site.Latency = 0
|
||||
site.LastError = ""
|
||||
site.LastSuccessAt = time.Now()
|
||||
|
||||
if prevStatus != "UP" {
|
||||
site.StatusChangedAt = time.Now()
|
||||
}
|
||||
|
||||
e.liveState[targetID] = site
|
||||
|
||||
if wasDown {
|
||||
e.AddLog(fmt.Sprintf("Push Monitor '%s' recovered", site.Name))
|
||||
e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.", site.Name))
|
||||
switch prevStatus {
|
||||
case "PENDING":
|
||||
e.AddLog(fmt.Sprintf("Push Monitor '%s' received first heartbeat", site.Name))
|
||||
case "LATE":
|
||||
e.AddLog(fmt.Sprintf("Push Monitor '%s' heartbeat arrived (was late)", site.Name))
|
||||
case "DOWN":
|
||||
downDur := ""
|
||||
if !site.StatusChangedAt.IsZero() {
|
||||
downDur = fmt.Sprintf(" (was down %s)", fmtDurationShort(time.Since(site.StatusChangedAt)))
|
||||
}
|
||||
e.AddLog(fmt.Sprintf("Push Monitor '%s' recovered%s", site.Name, downDur))
|
||||
go e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.%s", site.Name, downDur))
|
||||
}
|
||||
|
||||
if prevStatus != "UP" && prevStatus != "PENDING" {
|
||||
go func() { _ = e.db.SaveStateChange(targetID, prevStatus, "UP", "") }()
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -241,9 +307,6 @@ func (e *Engine) Start(ctx context.Context) {
|
||||
if !exists {
|
||||
e.mu.Lock()
|
||||
s.Status = "PENDING"
|
||||
if s.Type == "push" {
|
||||
s.LastCheck = time.Now()
|
||||
}
|
||||
if h, ok := e.GetHistory(s.ID); ok && len(h.Statuses) > 0 {
|
||||
if h.Statuses[len(h.Statuses)-1] {
|
||||
s.Status = "UP"
|
||||
@@ -283,6 +346,9 @@ func (e *Engine) UpdateSiteConfig(site models.Site) {
|
||||
site.LastCheck = existing.LastCheck
|
||||
site.SentSSLWarning = existing.SentSSLWarning
|
||||
site.FailureCount = existing.FailureCount
|
||||
site.LastError = existing.LastError
|
||||
site.StatusChangedAt = existing.StatusChangedAt
|
||||
site.LastSuccessAt = existing.LastSuccessAt
|
||||
e.liveState[site.ID] = site
|
||||
e.addToTokenIndex(site)
|
||||
}
|
||||
@@ -393,33 +459,62 @@ func (e *Engine) checkByID(id int) {
|
||||
updatedSite.CertExpiry = result.CertExpiry
|
||||
updatedSite.Latency = time.Duration(result.LatencyNs)
|
||||
updatedSite.LastCheck = time.Now()
|
||||
e.handleStatusChange(updatedSite, result.Status, result.StatusCode, time.Duration(result.LatencyNs))
|
||||
e.handleStatusChange(updatedSite, result.Status, result.StatusCode, time.Duration(result.LatencyNs), result.ErrorReason)
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) checkPush(site models.Site) {
|
||||
deadline := site.LastCheck.Add(time.Duration(site.Interval) * time.Second).Add(pushGracePeriod)
|
||||
if time.Now().After(deadline) {
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
} else if site.Status != "UP" {
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
if site.Status == "PENDING" {
|
||||
return
|
||||
}
|
||||
|
||||
interval := time.Duration(site.Interval) * time.Second
|
||||
grace := interval / 2
|
||||
if grace < minPushGrace {
|
||||
grace = minPushGrace
|
||||
}
|
||||
|
||||
overdue := site.LastCheck.Add(interval)
|
||||
graceEnd := overdue.Add(grace)
|
||||
now := time.Now()
|
||||
|
||||
if now.After(graceEnd) {
|
||||
if site.Status != "DOWN" {
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "heartbeat missed")
|
||||
}
|
||||
} else if now.After(overdue) {
|
||||
if site.Status != "LATE" {
|
||||
e.handleStatusChange(site, "LATE", 0, 0, "heartbeat overdue")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int, latency time.Duration) {
|
||||
func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int, latency time.Duration, errorReason string) {
|
||||
if !e.IsActive() {
|
||||
return
|
||||
}
|
||||
|
||||
newState := site
|
||||
newState.StatusCode = code
|
||||
newState.LastError = errorReason
|
||||
|
||||
if rawStatus == "UP" {
|
||||
newState.LastSuccessAt = time.Now()
|
||||
newState.LastError = ""
|
||||
} else {
|
||||
newState.LastSuccessAt = site.LastSuccessAt
|
||||
}
|
||||
|
||||
if site.Status == "UP" && rawStatus != "UP" {
|
||||
newState.FailureCount++
|
||||
if newState.FailureCount > site.MaxRetries {
|
||||
newState.Status = rawStatus
|
||||
newState.FailureCount = site.MaxRetries + 1
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN", site.Name))
|
||||
if errorReason != "" {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN: %s", site.Name, errorReason))
|
||||
} else {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN", site.Name))
|
||||
}
|
||||
} else {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' failed check %d/%d", site.Name, newState.FailureCount, site.MaxRetries))
|
||||
}
|
||||
@@ -431,6 +526,14 @@ func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int
|
||||
newState.FailureCount = site.MaxRetries + 1
|
||||
}
|
||||
|
||||
if newState.Status != site.Status && site.Status != "PENDING" {
|
||||
newState.StatusChangedAt = time.Now()
|
||||
} else if site.StatusChangedAt.IsZero() && newState.Status != "PENDING" {
|
||||
newState.StatusChangedAt = time.Now()
|
||||
} else {
|
||||
newState.StatusChangedAt = site.StatusChangedAt
|
||||
}
|
||||
|
||||
inMaint := e.isInMaintenance(site.ID)
|
||||
|
||||
if site.Type == "http" && site.CheckSSL && site.HasSSL {
|
||||
@@ -455,12 +558,24 @@ func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int
|
||||
|
||||
e.recordCheck(site.ID, latency, rawStatus == "UP")
|
||||
|
||||
if newState.Status != site.Status && site.Status != "PENDING" {
|
||||
go func() { _ = e.db.SaveStateChange(site.ID, site.Status, newState.Status, errorReason) }()
|
||||
}
|
||||
|
||||
isBroken := func(s string) bool { return s == "DOWN" || s == "SSL EXP" }
|
||||
|
||||
if site.Status == "UP" && newState.Status == "LATE" {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' heartbeat overdue", site.Name))
|
||||
}
|
||||
|
||||
if !isBroken(site.Status) && isBroken(newState.Status) && newState.Status != "PENDING" {
|
||||
if inMaint {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' is DOWN (alerts suppressed — maintenance)", site.Name))
|
||||
} else {
|
||||
msg := fmt.Sprintf("Monitor '%s' is DOWN (%s)", site.Name, rawStatus)
|
||||
if errorReason != "" {
|
||||
msg = fmt.Sprintf("Monitor '%s' is DOWN: %s", site.Name, errorReason)
|
||||
}
|
||||
if site.Type == "push" {
|
||||
msg = fmt.Sprintf("Push Monitor '%s' missed heartbeat.", site.Name)
|
||||
}
|
||||
@@ -468,11 +583,17 @@ func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int
|
||||
}
|
||||
}
|
||||
if isBroken(site.Status) && newState.Status == "UP" {
|
||||
if !inMaint {
|
||||
e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Monitor '%s' is UP", site.Name))
|
||||
} else {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' recovered (maintenance active, alert suppressed)", site.Name))
|
||||
downDur := ""
|
||||
if !site.StatusChangedAt.IsZero() {
|
||||
downDur = fmt.Sprintf(" (was down %s)", fmtDurationShort(time.Since(site.StatusChangedAt)))
|
||||
}
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' recovered%s", site.Name, downDur))
|
||||
if !inMaint {
|
||||
e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Monitor '%s' is UP%s", site.Name, downDur))
|
||||
}
|
||||
}
|
||||
if site.Status == "LATE" && newState.Status == "UP" && !isBroken(site.Status) {
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' heartbeat arrived (was late)", site.Name))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -489,11 +610,69 @@ func (e *Engine) triggerAlert(alertID int, title, message string) {
|
||||
defer cancel()
|
||||
if err := provider.Send(ctx, title, message); err != nil {
|
||||
e.AddLog(fmt.Sprintf("Alert send failed (%s): %v", cfg.Name, err))
|
||||
e.recordAlertResult(alertID, false, err.Error())
|
||||
} else {
|
||||
e.recordAlertResult(alertID, true, "")
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) recordAlertResult(alertID int, ok bool, errMsg string) {
|
||||
e.alertHealthMu.Lock()
|
||||
defer e.alertHealthMu.Unlock()
|
||||
h := e.alertHealth[alertID]
|
||||
h.LastSendAt = time.Now()
|
||||
h.LastSendOK = ok
|
||||
h.SendCount++
|
||||
if ok {
|
||||
h.LastError = ""
|
||||
} else {
|
||||
h.LastError = errMsg
|
||||
h.FailCount++
|
||||
}
|
||||
e.alertHealth[alertID] = h
|
||||
|
||||
// Persist best-effort so health survives restarts; DB IO off the alert path.
|
||||
go func(rec models.AlertHealthRecord) {
|
||||
_ = e.db.SaveAlertHealth(rec)
|
||||
}(models.AlertHealthRecord{
|
||||
AlertID: alertID,
|
||||
LastSendAt: h.LastSendAt,
|
||||
LastSendOK: h.LastSendOK,
|
||||
LastError: h.LastError,
|
||||
SendCount: h.SendCount,
|
||||
FailCount: h.FailCount,
|
||||
})
|
||||
}
|
||||
|
||||
func (e *Engine) GetAlertHealth(alertID int) AlertHealth {
|
||||
e.alertHealthMu.RLock()
|
||||
defer e.alertHealthMu.RUnlock()
|
||||
return e.alertHealth[alertID]
|
||||
}
|
||||
|
||||
func (e *Engine) TestAlert(alertID int) error {
|
||||
cfg, err := e.db.GetAlert(alertID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load alert: %w", err)
|
||||
}
|
||||
provider := alert.GetProvider(cfg)
|
||||
if provider == nil {
|
||||
return fmt.Errorf("no provider for type %q", cfg.Type)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
err = provider.Send(ctx, "🧪 Test Alert", fmt.Sprintf("Test notification from uptop for channel '%s'.", cfg.Name))
|
||||
if err != nil {
|
||||
e.recordAlertResult(alertID, false, err.Error())
|
||||
return err
|
||||
}
|
||||
e.recordAlertResult(alertID, true, "")
|
||||
e.AddLog(fmt.Sprintf("Test alert sent to '%s'", cfg.Name))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Engine) isInMaintenance(monitorID int) bool {
|
||||
inMaint, err := e.db.IsMonitorInMaintenance(monitorID)
|
||||
if err != nil {
|
||||
@@ -554,16 +733,17 @@ func (e *Engine) SetAggStrategy(strategy AggregationStrategy) {
|
||||
e.aggStrategy = strategy
|
||||
}
|
||||
|
||||
func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, isUp bool) {
|
||||
func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, isUp bool, errorReason string) {
|
||||
e.probeResultsMu.Lock()
|
||||
if e.probeResults[siteID] == nil {
|
||||
e.probeResults[siteID] = make(map[string]NodeResult)
|
||||
}
|
||||
e.probeResults[siteID][nodeID] = NodeResult{
|
||||
NodeID: nodeID,
|
||||
IsUp: isUp,
|
||||
LatencyNs: latencyNs,
|
||||
CheckedAt: time.Now(),
|
||||
NodeID: nodeID,
|
||||
IsUp: isUp,
|
||||
LatencyNs: latencyNs,
|
||||
CheckedAt: time.Now(),
|
||||
ErrorReason: errorReason,
|
||||
}
|
||||
results := make([]NodeResult, 0, len(e.probeResults[siteID]))
|
||||
for _, r := range e.probeResults[siteID] {
|
||||
@@ -588,7 +768,7 @@ func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, i
|
||||
updatedSite := site
|
||||
updatedSite.Latency = time.Duration(avgLatency)
|
||||
updatedSite.LastCheck = time.Now()
|
||||
e.handleStatusChange(updatedSite, rawStatus, 0, time.Duration(avgLatency))
|
||||
e.handleStatusChange(updatedSite, rawStatus, 0, time.Duration(avgLatency), errorReason)
|
||||
}
|
||||
|
||||
func (e *Engine) GetProbeResults(siteID int) map[string]NodeResult {
|
||||
@@ -601,3 +781,11 @@ func (e *Engine) GetProbeResults(siteID int) map[string]NodeResult {
|
||||
}
|
||||
return cp
|
||||
}
|
||||
|
||||
func (e *Engine) GetStateChanges(siteID int, limit int) []models.StateChange {
|
||||
changes, err := e.db.GetStateChanges(siteID, limit)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return changes
|
||||
}
|
||||
|
||||
@@ -2,10 +2,11 @@ package monitor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
)
|
||||
|
||||
// --- Mock Store ---
|
||||
@@ -62,18 +63,24 @@ func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return m
|
||||
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
|
||||
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
|
||||
func (m *mockStore) DeleteNode(string) error { return nil }
|
||||
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
|
||||
func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
|
||||
func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error) {
|
||||
m.mu.Lock()
|
||||
@@ -174,7 +181,7 @@ func TestHandleStatusChange_PendingToUp(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "PENDING", MaxRetries: 3, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 10*time.Millisecond)
|
||||
e.handleStatusChange(site, "UP", 200, 10*time.Millisecond, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -195,7 +202,7 @@ func TestHandleStatusChange_UpIncrementFailure(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 3, FailureCount: 0}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 500, 0)
|
||||
e.handleStatusChange(site, "DOWN", 500, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -213,7 +220,7 @@ func TestHandleStatusChange_UpToDown_ExceedsRetries(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 2, FailureCount: 2, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 500, 0)
|
||||
e.handleStatusChange(site, "DOWN", 500, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "DOWN" {
|
||||
@@ -236,7 +243,7 @@ func TestHandleStatusChange_UpToDown_ZeroRetries(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, FailureCount: 0, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "DOWN" {
|
||||
@@ -255,7 +262,7 @@ func TestHandleStatusChange_DownToUp_Recovery(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "DOWN", FailureCount: 4, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 5*time.Millisecond)
|
||||
e.handleStatusChange(site, "UP", 200, 5*time.Millisecond, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -276,7 +283,7 @@ func TestHandleStatusChange_DownStaysDown(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "DOWN", MaxRetries: 2, FailureCount: 3}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "DOWN" {
|
||||
@@ -295,7 +302,7 @@ func TestHandleStatusChange_SSLExpired(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "SSL EXP", 0, 0)
|
||||
e.handleStatusChange(site, "SSL EXP", 0, 0, "SSL certificate expired")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "SSL EXP" {
|
||||
@@ -315,7 +322,7 @@ func TestHandleStatusChange_AlertSuppressedMaintenance(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "DOWN" {
|
||||
@@ -346,7 +353,7 @@ func TestHandleStatusChange_RecoverySuppressedMaintenance(t *testing.T) {
|
||||
site := models.Site{ID: 1, Name: "test", Status: "DOWN", AlertID: 1}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -370,7 +377,7 @@ func TestHandleStatusChange_SSLWarning(t *testing.T) {
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if !s.SentSSLWarning {
|
||||
@@ -393,7 +400,7 @@ func TestHandleStatusChange_SSLWarningNotRepeated(t *testing.T) {
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
waitAsync()
|
||||
if len(ms.getAlertCallsSnapshot()) != 0 {
|
||||
@@ -412,7 +419,7 @@ func TestHandleStatusChange_SSLWarningReset(t *testing.T) {
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.SentSSLWarning {
|
||||
@@ -433,7 +440,7 @@ func TestHandleStatusChange_SSLWarningSuppressedMaint(t *testing.T) {
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if !s.SentSSLWarning {
|
||||
@@ -452,7 +459,7 @@ func TestHandleStatusChange_InactiveEngine(t *testing.T) {
|
||||
injectSite(e, site)
|
||||
e.SetActive(false)
|
||||
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
@@ -534,7 +541,7 @@ func TestCheckPush_DeadlineMissed(t *testing.T) {
|
||||
site := models.Site{
|
||||
ID: 1, Name: "push", Type: "push", Status: "UP",
|
||||
Interval: 10, MaxRetries: 0,
|
||||
LastCheck: time.Now().Add(-20 * time.Second),
|
||||
LastCheck: time.Now().Add(-120 * time.Second),
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
@@ -546,6 +553,24 @@ func TestCheckPush_DeadlineMissed(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckPush_OverdueBecomesLate(t *testing.T) {
|
||||
ms := newMockStore()
|
||||
e := newTestEngine(ms)
|
||||
site := models.Site{
|
||||
ID: 1, Name: "push", Type: "push", Status: "UP",
|
||||
Interval: 300,
|
||||
LastCheck: time.Now().Add(-310 * time.Second),
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.checkPush(site)
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "LATE" {
|
||||
t.Errorf("expected LATE when overdue but within grace, got %s", s.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckPush_WithinDeadline(t *testing.T) {
|
||||
ms := newMockStore()
|
||||
e := newTestEngine(ms)
|
||||
@@ -563,20 +588,20 @@ func TestCheckPush_WithinDeadline(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckPush_PendingToUp(t *testing.T) {
|
||||
func TestCheckPush_PendingStaysPending(t *testing.T) {
|
||||
ms := newMockStore()
|
||||
e := newTestEngine(ms)
|
||||
site := models.Site{
|
||||
ID: 1, Name: "push", Type: "push", Status: "PENDING",
|
||||
Interval: 60, LastCheck: time.Now(),
|
||||
Interval: 60,
|
||||
}
|
||||
injectSite(e, site)
|
||||
|
||||
e.checkPush(site)
|
||||
|
||||
s, _ := getSite(e, 1)
|
||||
if s.Status != "UP" {
|
||||
t.Errorf("expected UP, got %s", s.Status)
|
||||
if s.Status != "PENDING" {
|
||||
t.Errorf("expected PENDING to stay until first heartbeat, got %s", s.Status)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -991,7 +1016,7 @@ func TestConcurrent_HandleStatusChangeAndGetState(t *testing.T) {
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
e.handleStatusChange(site, "DOWN", 500, 0)
|
||||
e.handleStatusChange(site, "DOWN", 500, 0, "test error")
|
||||
}()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
@@ -11,11 +11,11 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/importer"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/metrics"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/store"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/importer"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/metrics"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
|
||||
)
|
||||
|
||||
const maxRequestBody = 1 << 20
|
||||
@@ -67,6 +67,7 @@ var statusTpl = template.Must(template.New("status").Parse(`
|
||||
.UP { background: #9ece6a; color: #1a1b26; }
|
||||
.DOWN { background: #f7768e; color: #1a1b26; }
|
||||
.PENDING { background: #e0af68; color: #1a1b26; }
|
||||
.LATE { background: #e0af68; color: #1a1b26; }
|
||||
.SSL-EXP { background: #e0af68; color: #1a1b26; }
|
||||
.PAUSED { background: #565f89; color: #c0caf5; }
|
||||
.MAINT { background: #bb9af7; color: #1a1b26; }
|
||||
@@ -403,9 +404,10 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server {
|
||||
var req struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Results []struct {
|
||||
SiteID int `json:"site_id"`
|
||||
LatencyNs int64 `json:"latency_ns"`
|
||||
IsUp bool `json:"is_up"`
|
||||
SiteID int `json:"site_id"`
|
||||
LatencyNs int64 `json:"latency_ns"`
|
||||
IsUp bool `json:"is_up"`
|
||||
ErrorReason string `json:"error_reason"`
|
||||
} `json:"results"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
@@ -420,7 +422,7 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server {
|
||||
if err := s.SaveCheckFromNode(result.SiteID, req.NodeID, result.LatencyNs, result.IsUp); err != nil {
|
||||
log.Printf("Failed to save probe result: %v", err)
|
||||
}
|
||||
eng.IngestProbeResult(req.NodeID, result.SiteID, result.LatencyNs, result.IsUp)
|
||||
eng.IngestProbeResult(req.NodeID, result.SiteID, result.LatencyNs, result.IsUp, result.ErrorReason)
|
||||
}
|
||||
if err := s.UpdateNodeLastSeen(req.NodeID); err != nil {
|
||||
log.Printf("Failed to update node last seen: %v", err)
|
||||
|
||||
@@ -4,13 +4,14 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"net"
|
||||
"net/http"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
)
|
||||
|
||||
// --- Mock Store ---
|
||||
@@ -64,18 +65,24 @@ func (m *mockStore) AddAlertReturningID(string, string, map[string]string) (int,
|
||||
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
|
||||
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
|
||||
func (m *mockStore) DeleteNode(string) error { return nil }
|
||||
func (m *mockStore) SaveLog(string) error { return nil }
|
||||
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
|
||||
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
|
||||
func (m *mockStore) SaveLog(string) error { return nil }
|
||||
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||
func (m *mockStore) Close() error { return nil }
|
||||
|
||||
func (m *mockStore) ExportData() (models.Backup, error) {
|
||||
return models.Backup{
|
||||
|
||||
@@ -14,6 +14,7 @@ type Dialect interface {
|
||||
ImportWipe(tx *sql.Tx)
|
||||
ImportResetSequences(tx *sql.Tx)
|
||||
UpsertNodeSQL() string
|
||||
UpsertAlertHealthSQL() string
|
||||
}
|
||||
|
||||
func rewritePlaceholders(query string, dollarStyle bool) string {
|
||||
|
||||
@@ -72,6 +72,23 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)`,
|
||||
`CREATE TABLE IF NOT EXISTS state_changes (
|
||||
id SERIAL PRIMARY KEY,
|
||||
site_id INTEGER NOT NULL,
|
||||
from_status TEXT NOT NULL,
|
||||
to_status TEXT NOT NULL,
|
||||
error_reason TEXT DEFAULT '',
|
||||
changed_at TIMESTAMP DEFAULT NOW()
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||
`CREATE TABLE IF NOT EXISTS alert_health (
|
||||
alert_id INTEGER PRIMARY KEY,
|
||||
last_send_at TIMESTAMP,
|
||||
last_send_ok BOOLEAN DEFAULT FALSE,
|
||||
last_error TEXT DEFAULT '',
|
||||
send_count INTEGER DEFAULT 0,
|
||||
fail_count INTEGER DEFAULT 0
|
||||
)`,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,6 +114,10 @@ func (d *PostgresDialect) UpsertNodeSQL() string {
|
||||
return "INSERT INTO nodes (id, name, region, last_seen, version) VALUES ($1, $2, $3, NOW(), $4) ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, region = EXCLUDED.region, last_seen = NOW(), version = EXCLUDED.version"
|
||||
}
|
||||
|
||||
func (d *PostgresDialect) UpsertAlertHealthSQL() string {
|
||||
return "INSERT INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (alert_id) DO UPDATE SET last_send_at = EXCLUDED.last_send_at, last_send_ok = EXCLUDED.last_send_ok, last_error = EXCLUDED.last_error, send_count = EXCLUDED.send_count, fail_count = EXCLUDED.fail_count"
|
||||
}
|
||||
|
||||
func (d *PostgresDialect) ResetSequenceOnEmpty(db *sql.DB, table string) {}
|
||||
|
||||
func (d *PostgresDialect) ImportWipe(tx *sql.Tx) {
|
||||
|
||||
@@ -79,6 +79,23 @@ func (d *SQLiteDialect) CreateTablesSQL() []string {
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)`,
|
||||
`CREATE TABLE IF NOT EXISTS state_changes (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
site_id INTEGER NOT NULL,
|
||||
from_status TEXT NOT NULL,
|
||||
to_status TEXT NOT NULL,
|
||||
error_reason TEXT DEFAULT '',
|
||||
changed_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||
`CREATE TABLE IF NOT EXISTS alert_health (
|
||||
alert_id INTEGER PRIMARY KEY,
|
||||
last_send_at DATETIME,
|
||||
last_send_ok BOOLEAN DEFAULT 0,
|
||||
last_error TEXT DEFAULT '',
|
||||
send_count INTEGER DEFAULT 0,
|
||||
fail_count INTEGER DEFAULT 0
|
||||
)`,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,6 +121,10 @@ func (d *SQLiteDialect) UpsertNodeSQL() string {
|
||||
return "INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?)"
|
||||
}
|
||||
|
||||
func (d *SQLiteDialect) UpsertAlertHealthSQL() string {
|
||||
return "INSERT OR REPLACE INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES (?, ?, ?, ?, ?, ?)"
|
||||
}
|
||||
|
||||
func (d *SQLiteDialect) ResetSequenceOnEmpty(db *sql.DB, table string) {
|
||||
var count int
|
||||
_ = db.QueryRow("SELECT COUNT(*) FROM " + table).Scan(&count) //nolint:errcheck
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -347,6 +347,29 @@ func (s *SQLStore) DeleteUser(id int) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error {
|
||||
_, err := s.db.Exec(s.q("INSERT INTO state_changes (site_id, from_status, to_status, error_reason) VALUES (?, ?, ?, ?)"),
|
||||
siteID, fromStatus, toStatus, errorReason)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) GetStateChanges(siteID int, limit int) ([]models.StateChange, error) {
|
||||
rows, err := s.db.Query(s.q("SELECT id, site_id, from_status, to_status, error_reason, changed_at FROM state_changes WHERE site_id = ? ORDER BY changed_at DESC LIMIT ?"), siteID, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var changes []models.StateChange
|
||||
for rows.Next() {
|
||||
var sc models.StateChange
|
||||
if err := rows.Scan(&sc.ID, &sc.SiteID, &sc.FromStatus, &sc.ToStatus, &sc.ErrorReason, &sc.ChangedAt); err != nil {
|
||||
return changes, err
|
||||
}
|
||||
changes = append(changes, sc)
|
||||
}
|
||||
return changes, rows.Err()
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveCheck(siteID int, latencyNs int64, isUp bool) error {
|
||||
return s.SaveCheckFromNode(siteID, "", latencyNs, isUp)
|
||||
}
|
||||
@@ -407,6 +430,37 @@ func (s *SQLStore) DeleteNode(id string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
rows, err := s.db.Query("SELECT alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count FROM alert_health")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
out := make(map[int]models.AlertHealthRecord)
|
||||
for rows.Next() {
|
||||
var r models.AlertHealthRecord
|
||||
var lastSend sql.NullTime
|
||||
if err := rows.Scan(&r.AlertID, &lastSend, &r.LastSendOK, &r.LastError, &r.SendCount, &r.FailCount); err != nil {
|
||||
return out, err
|
||||
}
|
||||
if lastSend.Valid {
|
||||
r.LastSendAt = lastSend.Time
|
||||
}
|
||||
out[r.AlertID] = r
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveAlertHealth(h models.AlertHealthRecord) error {
|
||||
var lastSend interface{}
|
||||
if !h.LastSendAt.IsZero() {
|
||||
lastSend = h.LastSendAt
|
||||
}
|
||||
_, err := s.db.Exec(s.dialect.UpsertAlertHealthSQL(),
|
||||
h.AlertID, lastSend, h.LastSendOK, h.LastError, h.SendCount, h.FailCount)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveLog(message string) error {
|
||||
_, err := s.db.Exec(s.q("INSERT INTO logs (message) VALUES (?)"), message)
|
||||
if err != nil {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
)
|
||||
|
||||
type Store interface {
|
||||
@@ -38,6 +38,10 @@ type Store interface {
|
||||
SaveCheckFromNode(siteID int, nodeID string, latencyNs int64, isUp bool) error
|
||||
LoadAllHistory(limit int) (map[int][]models.CheckRecord, error)
|
||||
|
||||
// State Changes
|
||||
SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error
|
||||
GetStateChanges(siteID int, limit int) ([]models.StateChange, error)
|
||||
|
||||
// Nodes
|
||||
RegisterNode(node models.ProbeNode) error
|
||||
GetNode(id string) (models.ProbeNode, error)
|
||||
@@ -45,6 +49,10 @@ type Store interface {
|
||||
UpdateNodeLastSeen(id string) error
|
||||
DeleteNode(id string) error
|
||||
|
||||
// Alert Health
|
||||
LoadAlertHealth() (map[int]models.AlertHealthRecord, error)
|
||||
SaveAlertHealth(h models.AlertHealthRecord) error
|
||||
|
||||
// Logs
|
||||
SaveLog(message string) error
|
||||
LoadLogs(limit int) ([]string, error)
|
||||
|
||||
@@ -2,7 +2,10 @@ package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/huh"
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
@@ -113,34 +116,122 @@ func fmtAlertConfig(alert struct {
|
||||
}
|
||||
}
|
||||
|
||||
func fmtAlertHealth(h monitor.AlertHealth) string {
|
||||
if h.LastSendAt.IsZero() {
|
||||
return subtleStyle.Render("●")
|
||||
}
|
||||
if h.LastSendOK {
|
||||
return specialStyle.Render("●")
|
||||
}
|
||||
return dangerStyle.Render("●")
|
||||
}
|
||||
|
||||
func fmtAlertLastSent(h monitor.AlertHealth) string {
|
||||
if h.LastSendAt.IsZero() {
|
||||
return subtleStyle.Render("never")
|
||||
}
|
||||
d := time.Since(h.LastSendAt)
|
||||
if d < time.Minute {
|
||||
return fmt.Sprintf("%ds ago", int(d.Seconds()))
|
||||
}
|
||||
if d < time.Hour {
|
||||
return fmt.Sprintf("%dm ago", int(d.Minutes()))
|
||||
}
|
||||
if d < 24*time.Hour {
|
||||
return fmt.Sprintf("%dh ago", int(d.Hours()))
|
||||
}
|
||||
return fmt.Sprintf("%dd ago", int(d.Hours())/24)
|
||||
}
|
||||
|
||||
func (m Model) viewAlertsTab() string {
|
||||
if len(m.alerts) == 0 {
|
||||
return "\n No alert channels configured. Press [n] to add one."
|
||||
}
|
||||
|
||||
var headers []string
|
||||
var widths []int
|
||||
if m.isWide() {
|
||||
headers = []string{"#", "", "NAME", "TYPE", "CONFIG", "LAST SENT"}
|
||||
widths = []int{4, 3, 18, 12, 40, 12}
|
||||
} else {
|
||||
headers = []string{"#", "", "NAME", "TYPE", "CONFIG", "SENT"}
|
||||
widths = []int{4, 3, 14, 10, 24, 8}
|
||||
}
|
||||
nameW := widths[2]
|
||||
cfgW := widths[4]
|
||||
|
||||
return m.renderTable(
|
||||
[]string{"#", "NAME", "TYPE", "CONFIG"},
|
||||
headers,
|
||||
len(m.alerts),
|
||||
func(start, end int) [][]string {
|
||||
var rows [][]string
|
||||
for i := start; i < end; i++ {
|
||||
a := m.alerts[i]
|
||||
h := m.engine.GetAlertHealth(a.ID)
|
||||
rows = append(rows, []string{
|
||||
fmt.Sprintf("%d", i+1),
|
||||
m.zones.Mark(fmt.Sprintf("alert-%d", i), limitStr(a.Name, 15)),
|
||||
fmtAlertHealth(h),
|
||||
m.zones.Mark(fmt.Sprintf("alert-%d", i), limitStr(a.Name, nameW-2)),
|
||||
fmtAlertType(a.Type),
|
||||
fmtAlertConfig(struct {
|
||||
limitStr(fmtAlertConfig(struct {
|
||||
Type string
|
||||
Settings map[string]string
|
||||
}{a.Type, a.Settings}),
|
||||
}{a.Type, a.Settings}), cfgW-2),
|
||||
fmtAlertLastSent(h),
|
||||
})
|
||||
}
|
||||
return rows
|
||||
},
|
||||
nil, nil,
|
||||
widths, nil,
|
||||
)
|
||||
}
|
||||
|
||||
func (m Model) viewAlertDetailPanel() string {
|
||||
if m.cursor >= len(m.alerts) {
|
||||
return ""
|
||||
}
|
||||
a := m.alerts[m.cursor]
|
||||
h := m.engine.GetAlertHealth(a.ID)
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
b.WriteString(subtleStyle.Render(" Alerts > ") + titleStyle.Render(a.Name) + "\n\n")
|
||||
|
||||
row := func(label, value string) {
|
||||
fmt.Fprintf(&b, " %-16s %s\n", subtleStyle.Render(label), value)
|
||||
}
|
||||
|
||||
row("Type", fmtAlertType(a.Type))
|
||||
|
||||
if h.LastSendAt.IsZero() {
|
||||
row("Health", subtleStyle.Render("never sent"))
|
||||
} else if h.LastSendOK {
|
||||
row("Health", specialStyle.Render("OK"))
|
||||
} else {
|
||||
row("Health", dangerStyle.Render("FAILED"))
|
||||
}
|
||||
|
||||
if !h.LastSendAt.IsZero() {
|
||||
row("Last Sent", h.LastSendAt.Format("2006-01-02 15:04:05")+" ("+fmtAlertLastSent(h)+")")
|
||||
}
|
||||
if h.SendCount > 0 {
|
||||
row("Sends", fmt.Sprintf("%d sent, %d failed", h.SendCount, h.FailCount))
|
||||
}
|
||||
if h.LastError != "" {
|
||||
row("Last Error", dangerStyle.Render(limitStr(h.LastError, 60)))
|
||||
}
|
||||
|
||||
b.WriteString("\n" + subtleStyle.Render(" CONFIGURATION") + "\n")
|
||||
for k, v := range a.Settings {
|
||||
row(k, v)
|
||||
}
|
||||
|
||||
b.WriteString("\n\n")
|
||||
b.WriteString(subtleStyle.Render(" [i/Esc] Back [e] Edit [t] Test [q] Quit"))
|
||||
|
||||
return lipgloss.NewStyle().Padding(1, 2).Render(b.String())
|
||||
}
|
||||
|
||||
func (m *Model) initAlertHuhForm() tea.Cmd {
|
||||
m.alertFormData = &alertFormData{
|
||||
AlertType: "discord",
|
||||
|
||||
+89
-21
@@ -5,27 +5,83 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func colorizeLog(line string) string {
|
||||
type logSeverity int
|
||||
|
||||
const (
|
||||
severityInfo logSeverity = iota
|
||||
severityWarn
|
||||
severityDown
|
||||
severityUp
|
||||
severitySystem
|
||||
)
|
||||
|
||||
func classifyLog(line string) logSeverity {
|
||||
lower := strings.ToLower(line)
|
||||
switch {
|
||||
case strings.Contains(lower, "confirmed down"),
|
||||
strings.Contains(lower, "is down"),
|
||||
strings.Contains(lower, "missed heartbeat"),
|
||||
strings.Contains(lower, "failed check"),
|
||||
strings.Contains(lower, "ssl warning"):
|
||||
return dangerStyle.Render(line)
|
||||
strings.Contains(lower, "alert send failed"):
|
||||
return severityDown
|
||||
case strings.Contains(lower, "recovered"),
|
||||
strings.Contains(lower, "is up"),
|
||||
strings.Contains(lower, "recovery"):
|
||||
return specialStyle.Render(line)
|
||||
strings.Contains(lower, "recovery"),
|
||||
strings.Contains(lower, "first heartbeat"):
|
||||
return severityUp
|
||||
case strings.Contains(lower, "failed check"),
|
||||
strings.Contains(lower, "ssl warning"),
|
||||
strings.Contains(lower, "overdue"),
|
||||
strings.Contains(lower, "was late"):
|
||||
return severityWarn
|
||||
case strings.Contains(lower, "engine"),
|
||||
strings.Contains(lower, "cluster"):
|
||||
return titleStyle.Render(line)
|
||||
strings.Contains(lower, "cluster"),
|
||||
strings.Contains(lower, "loaded"),
|
||||
strings.Contains(lower, "paused"),
|
||||
strings.Contains(lower, "resumed"):
|
||||
return severitySystem
|
||||
default:
|
||||
return line
|
||||
return severityInfo
|
||||
}
|
||||
}
|
||||
|
||||
func isImportantLog(sev logSeverity) bool {
|
||||
return sev == severityDown || sev == severityUp || sev == severitySystem
|
||||
}
|
||||
|
||||
func renderLogTag(sev logSeverity) string {
|
||||
switch sev {
|
||||
case severityDown:
|
||||
return dangerStyle.Render(" DOWN ")
|
||||
case severityUp:
|
||||
return specialStyle.Render(" UP ")
|
||||
case severityWarn:
|
||||
return warnStyle.Render(" WARN ")
|
||||
case severitySystem:
|
||||
return titleStyle.Render(" SYS ")
|
||||
default:
|
||||
return subtleStyle.Render(" info ")
|
||||
}
|
||||
}
|
||||
|
||||
func renderLogLine(line string) string {
|
||||
sev := classifyLog(line)
|
||||
tag := renderLogTag(sev)
|
||||
|
||||
ts := ""
|
||||
msg := line
|
||||
if len(line) > 10 && line[0] == '[' {
|
||||
if idx := strings.Index(line, "]"); idx > 0 && idx < 12 {
|
||||
ts = subtleStyle.Render(line[1:idx])
|
||||
msg = strings.TrimSpace(line[idx+1:])
|
||||
}
|
||||
}
|
||||
|
||||
if ts != "" {
|
||||
return fmt.Sprintf(" %s %s %s", ts, tag, msg)
|
||||
}
|
||||
return fmt.Sprintf(" %s %s", tag, msg)
|
||||
}
|
||||
|
||||
func (m Model) viewLogsTab() string {
|
||||
content := m.logViewport.View()
|
||||
if strings.TrimSpace(content) == "" || content == "Waiting for logs..." {
|
||||
@@ -33,22 +89,34 @@ func (m Model) viewLogsTab() string {
|
||||
}
|
||||
|
||||
lines := strings.Split(content, "\n")
|
||||
var colored []string
|
||||
var rendered []string
|
||||
total := 0
|
||||
shown := 0
|
||||
|
||||
for _, line := range lines {
|
||||
if line == "" {
|
||||
colored = append(colored, line)
|
||||
if strings.TrimSpace(line) == "" {
|
||||
continue
|
||||
}
|
||||
colored = append(colored, colorizeLog(line))
|
||||
}
|
||||
|
||||
count := 0
|
||||
for _, l := range lines {
|
||||
if strings.TrimSpace(l) != "" {
|
||||
count++
|
||||
total++
|
||||
sev := classifyLog(line)
|
||||
if m.logFilterImportant && !isImportantLog(sev) {
|
||||
continue
|
||||
}
|
||||
shown++
|
||||
rendered = append(rendered, renderLogLine(line))
|
||||
}
|
||||
|
||||
header := subtleStyle.Render(fmt.Sprintf(" %d entries [↑/↓] Scroll [PgUp/PgDn] Page", count))
|
||||
return "\n" + header + "\n\n" + strings.Join(colored, "\n")
|
||||
filterLabel := "All"
|
||||
if m.logFilterImportant {
|
||||
filterLabel = "Important"
|
||||
}
|
||||
|
||||
header := subtleStyle.Render(fmt.Sprintf(
|
||||
" %d entries [↑/↓] Scroll [PgUp/PgDn] Page [f] Filter: %s", shown, filterLabel))
|
||||
|
||||
if m.logFilterImportant && shown < total {
|
||||
header += subtleStyle.Render(fmt.Sprintf(" (%d hidden)", total-shown))
|
||||
}
|
||||
|
||||
return "\n" + header + "\n\n" + strings.Join(rendered, "\n")
|
||||
}
|
||||
|
||||
+28
-11
@@ -2,10 +2,11 @@ package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/huh"
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
@@ -40,19 +41,19 @@ func fmtMaintType(t string) string {
|
||||
return maintStyle.Render("maintenance")
|
||||
}
|
||||
|
||||
func fmtMaintMonitor(monitorID int, sites []models.Site) string {
|
||||
func fmtMaintMonitorW(monitorID int, sites []models.Site, maxW int) string {
|
||||
if monitorID == 0 {
|
||||
return "All"
|
||||
}
|
||||
for _, s := range sites {
|
||||
if s.ID == monitorID {
|
||||
return limitStr(s.Name, 18)
|
||||
return limitStr(s.Name, maxW)
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("#%d", monitorID)
|
||||
}
|
||||
|
||||
func fmtMaintTime(t time.Time) string {
|
||||
func fmtMaintTime(t time.Time, colW int) string {
|
||||
if t.IsZero() {
|
||||
return subtleStyle.Render("—")
|
||||
}
|
||||
@@ -60,7 +61,10 @@ func fmtMaintTime(t time.Time) string {
|
||||
if t.Year() == now.Year() && t.YearDay() == now.YearDay() {
|
||||
return t.Format("15:04")
|
||||
}
|
||||
return t.Format("15:04 Jan 02")
|
||||
if colW >= 14 {
|
||||
return t.Format("15:04 Jan 02")
|
||||
}
|
||||
return t.Format("Jan 02")
|
||||
}
|
||||
|
||||
func (m Model) isMonitorInMaintenance(monitorID int) bool {
|
||||
@@ -92,8 +96,21 @@ func (m Model) viewMaintTab() string {
|
||||
return "\n No maintenance windows or incidents. Press [n] to create one."
|
||||
}
|
||||
|
||||
var headers []string
|
||||
var widths []int
|
||||
if m.isWide() {
|
||||
headers = []string{"#", "TITLE", "TYPE", "MONITORS", "STATUS", "STARTED", "ENDS"}
|
||||
widths = []int{4, 24, 14, 22, 12, 16, 16}
|
||||
} else {
|
||||
headers = []string{"#", "TITLE", "TYPE", "MON", "ST", "START", "ENDS"}
|
||||
widths = []int{4, 14, 13, 14, 11, 14, 14}
|
||||
}
|
||||
titleW := widths[1]
|
||||
monW := widths[3]
|
||||
timeW := widths[5]
|
||||
|
||||
return m.renderTable(
|
||||
[]string{"#", "TITLE", "TYPE", "MONITORS", "STATUS", "STARTED", "ENDS"},
|
||||
headers,
|
||||
len(m.maintenanceWindows),
|
||||
func(start, end int) [][]string {
|
||||
var rows [][]string
|
||||
@@ -102,17 +119,17 @@ func (m Model) viewMaintTab() string {
|
||||
mw := m.maintenanceWindows[i]
|
||||
rows = append(rows, []string{
|
||||
strconv.Itoa(i + 1),
|
||||
m.zones.Mark(fmt.Sprintf("maint-%d", i), limitStr(mw.Title, 24)),
|
||||
m.zones.Mark(fmt.Sprintf("maint-%d", i), limitStr(mw.Title, titleW-2)),
|
||||
fmtMaintType(mw.Type),
|
||||
fmtMaintMonitor(mw.MonitorID, allSites),
|
||||
fmtMaintMonitorW(mw.MonitorID, allSites, monW-2),
|
||||
fmtMaintStatus(mw),
|
||||
fmtMaintTime(mw.StartTime),
|
||||
fmtMaintTime(mw.EndTime),
|
||||
fmtMaintTime(mw.StartTime, timeW),
|
||||
fmtMaintTime(mw.EndTime, timeW),
|
||||
})
|
||||
}
|
||||
return rows
|
||||
},
|
||||
[]int{6, 0, 14, 20, 12, 16, 16},
|
||||
widths,
|
||||
nil,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -10,16 +10,25 @@ func (m Model) viewNodesTab() string {
|
||||
return "\n No probe nodes connected."
|
||||
}
|
||||
|
||||
colWidths := []int{0, 12, 20, 10, 8}
|
||||
var headers []string
|
||||
var widths []int
|
||||
if m.isWide() {
|
||||
headers = []string{"NAME", "REGION", "LAST SEEN", "VERSION", "STATUS"}
|
||||
widths = []int{24, 14, 16, 12, 10}
|
||||
} else {
|
||||
headers = []string{"NAME", "REGION", "SEEN", "VER", "STATUS"}
|
||||
widths = []int{16, 10, 10, 8, 8}
|
||||
}
|
||||
nameW := widths[0]
|
||||
|
||||
return m.renderTable(
|
||||
[]string{"NAME", "REGION", "LAST SEEN", "VERSION", "STATUS"},
|
||||
headers,
|
||||
len(m.nodes),
|
||||
func(start, end int) [][]string {
|
||||
var rows [][]string
|
||||
for i := start; i < end; i++ {
|
||||
node := m.nodes[i]
|
||||
name := limitStr(node.Name, 20)
|
||||
name := limitStr(node.Name, nameW-2)
|
||||
if name == "" {
|
||||
name = node.ID
|
||||
}
|
||||
@@ -37,7 +46,7 @@ func (m Model) viewNodesTab() string {
|
||||
}
|
||||
return rows
|
||||
},
|
||||
colWidths,
|
||||
widths,
|
||||
nil,
|
||||
)
|
||||
}
|
||||
|
||||
+214
-56
@@ -7,7 +7,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/huh"
|
||||
@@ -60,14 +60,18 @@ type siteFormData struct {
|
||||
Regions string
|
||||
}
|
||||
|
||||
func latencySparkline(latencies []time.Duration, width int) string {
|
||||
func latencySparkline(latencies []time.Duration, statuses []bool, width int) string {
|
||||
if len(latencies) == 0 {
|
||||
return subtleStyle.Render(strings.Repeat("·", width))
|
||||
}
|
||||
|
||||
samples := latencies
|
||||
sampledStatuses := statuses
|
||||
if len(samples) > width {
|
||||
samples = samples[len(samples)-width:]
|
||||
if len(sampledStatuses) > width {
|
||||
sampledStatuses = sampledStatuses[len(sampledStatuses)-width:]
|
||||
}
|
||||
}
|
||||
|
||||
minL, maxL := samples[0], samples[0]
|
||||
@@ -85,7 +89,7 @@ func latencySparkline(latencies []time.Duration, width int) string {
|
||||
sb.WriteString(subtleStyle.Render(strings.Repeat("·", remaining)))
|
||||
}
|
||||
spread := maxL - minL
|
||||
for _, l := range samples {
|
||||
for i, l := range samples {
|
||||
idx := 0
|
||||
if spread > 0 {
|
||||
idx = int(float64(l-minL) / float64(spread) * 7)
|
||||
@@ -94,13 +98,18 @@ func latencySparkline(latencies []time.Duration, width int) string {
|
||||
}
|
||||
}
|
||||
ch := string(sparkChars[idx])
|
||||
ms := l.Milliseconds()
|
||||
if ms < 200 {
|
||||
sb.WriteString(specialStyle.Render(ch))
|
||||
} else if ms < 500 {
|
||||
sb.WriteString(warnStyle.Render(ch))
|
||||
} else {
|
||||
isDown := i < len(sampledStatuses) && !sampledStatuses[i]
|
||||
if isDown {
|
||||
sb.WriteString(dangerStyle.Render(ch))
|
||||
} else {
|
||||
ms := l.Milliseconds()
|
||||
if ms < 200 {
|
||||
sb.WriteString(specialStyle.Render(ch))
|
||||
} else if ms < 500 {
|
||||
sb.WriteString(warnStyle.Render(ch))
|
||||
} else {
|
||||
sb.WriteString(dangerStyle.Render(ch))
|
||||
}
|
||||
}
|
||||
}
|
||||
return sb.String()
|
||||
@@ -302,6 +311,8 @@ func fmtStatus(status string, paused bool, inMaint bool) string {
|
||||
switch status {
|
||||
case "DOWN", "SSL EXP":
|
||||
return dangerStyle.Render(status)
|
||||
case "LATE":
|
||||
return warnStyle.Render(status)
|
||||
case "PENDING":
|
||||
return subtleStyle.Render(status)
|
||||
default:
|
||||
@@ -309,28 +320,94 @@ func fmtStatus(status string, paused bool, inMaint bool) string {
|
||||
}
|
||||
}
|
||||
|
||||
func (m Model) dynamicWidths() (nameW, sparkW int) {
|
||||
fixed := 6 + 10 + 10 + 8 + 8 + 7 + 9 // #, TYPE, STATUS, LATENCY, UPTIME, SSL, RETRY
|
||||
overhead := 30 // cell padding + borders
|
||||
avail := m.termWidth - chromePadH - 2 - fixed - overhead
|
||||
if avail < 30 {
|
||||
avail = 30
|
||||
func fmtDuration(d time.Duration) string {
|
||||
if d < time.Minute {
|
||||
return fmt.Sprintf("%ds", int(d.Seconds()))
|
||||
}
|
||||
if d < time.Hour {
|
||||
return fmt.Sprintf("%dm", int(d.Minutes()))
|
||||
}
|
||||
if d < 24*time.Hour {
|
||||
h := int(d.Hours())
|
||||
m := int(d.Minutes()) % 60
|
||||
if m > 0 {
|
||||
return fmt.Sprintf("%dh %dm", h, m)
|
||||
}
|
||||
return fmt.Sprintf("%dh", h)
|
||||
}
|
||||
days := int(d.Hours()) / 24
|
||||
hours := int(d.Hours()) % 24
|
||||
if hours > 0 {
|
||||
return fmt.Sprintf("%dd %dh", days, hours)
|
||||
}
|
||||
return fmt.Sprintf("%dd", days)
|
||||
}
|
||||
|
||||
type tableLayout struct {
|
||||
nameW, sparkW int
|
||||
headers []string
|
||||
colWidths []int
|
||||
}
|
||||
|
||||
func (m Model) computeLayout() tableLayout {
|
||||
wide := m.isWide()
|
||||
|
||||
var fixed int
|
||||
var headers []string
|
||||
var widths []int
|
||||
|
||||
if wide {
|
||||
// # NAME TYPE STATUS LATENCY UPTIME HISTORY SSL RETRIES
|
||||
headers = []string{"#", "NAME", "TYPE", "STATUS", "LATENCY", "UPTIME", "HISTORY", "SSL", "RETRIES"}
|
||||
widths = []int{4, 0, 10, 10, 10, 8, 0, 7, 9}
|
||||
fixed = 4 + 10 + 10 + 10 + 8 + 7 + 9
|
||||
} else {
|
||||
// # NAME TYPE STATUS LAT UP% HISTORY SSL RT
|
||||
headers = []string{"#", "NAME", "TYPE", "STATUS", "LAT", "UP%", "HISTORY", "SSL", "RT"}
|
||||
widths = []int{4, 0, 8, 8, 7, 8, 0, 5, 5}
|
||||
fixed = 4 + 8 + 8 + 7 + 8 + 5 + 5
|
||||
}
|
||||
|
||||
numCols := len(headers)
|
||||
borderOverhead := 2 + (numCols - 1)
|
||||
avail := m.termWidth - chromePadH - 2 - borderOverhead - fixed
|
||||
if avail < 20 {
|
||||
avail = 20
|
||||
}
|
||||
|
||||
maxName := 0
|
||||
for _, s := range m.sites {
|
||||
if n := len([]rune(s.Name)); n > maxName {
|
||||
maxName = n
|
||||
}
|
||||
}
|
||||
maxName += 4
|
||||
|
||||
nameW := avail / 2
|
||||
if nameW > maxName {
|
||||
nameW = maxName
|
||||
}
|
||||
nameW = avail / 2
|
||||
sparkW = avail - nameW - 2 // -2 for spark column padding
|
||||
if nameW < 13 {
|
||||
nameW = 13
|
||||
}
|
||||
if nameW > 40 {
|
||||
nameW = 40
|
||||
}
|
||||
|
||||
sparkW := avail - nameW
|
||||
if sparkW < 10 {
|
||||
sparkW = 10
|
||||
}
|
||||
if sparkW > 60 {
|
||||
sparkW = 60
|
||||
|
||||
widths[1] = nameW
|
||||
widths[6] = sparkW
|
||||
|
||||
return tableLayout{
|
||||
nameW: nameW,
|
||||
sparkW: sparkW,
|
||||
headers: headers,
|
||||
colWidths: widths,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (m Model) viewSitesTab() string {
|
||||
@@ -348,12 +425,16 @@ func (m Model) viewSitesTab() string {
|
||||
return "\n" + welcome
|
||||
}
|
||||
|
||||
nameW, sparkWidth := m.dynamicWidths()
|
||||
colWidths := []int{6, 0, 10, 10, 8, 8, sparkWidth + 2, 7, 9}
|
||||
layout := m.computeLayout()
|
||||
nameW := layout.nameW
|
||||
sparkWidth := layout.sparkW - 2
|
||||
if sparkWidth < 8 {
|
||||
sparkWidth = 8
|
||||
}
|
||||
|
||||
var groupRows map[int]bool
|
||||
return m.renderTable(
|
||||
[]string{"#", "NAME", "TYPE", "STATUS", "LATENCY", "UPTIME", "HISTORY", "SSL", "RETRY"},
|
||||
layout.headers,
|
||||
len(m.sites),
|
||||
func(start, end int) [][]string {
|
||||
groupRows = make(map[int]bool)
|
||||
@@ -366,7 +447,7 @@ func (m Model) viewSitesTab() string {
|
||||
icon := typeIcon("group", m.collapsed[site.ID])
|
||||
rows = append(rows, []string{
|
||||
strconv.Itoa(i + 1),
|
||||
m.zones.Mark(fmt.Sprintf("site-%d", i), icon+" "+limitStr(site.Name, nameW-2)),
|
||||
m.zones.Mark(fmt.Sprintf("site-%d", i), icon+" "+limitStr(site.Name, nameW-4)),
|
||||
"group",
|
||||
fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID)),
|
||||
subtleStyle.Render("—"),
|
||||
@@ -384,9 +465,17 @@ func (m Model) viewSitesTab() string {
|
||||
if i+1 >= len(m.sites) || m.sites[i+1].ParentID != site.ParentID {
|
||||
prefix = "└"
|
||||
}
|
||||
name = prefix + " " + limitStr(name, nameW-2)
|
||||
name = prefix + " " + limitStr(name, nameW-4)
|
||||
} else {
|
||||
name = limitStr(name, nameW)
|
||||
name = limitStr(name, nameW-2)
|
||||
}
|
||||
|
||||
if (site.Status == "DOWN" || site.Status == "SSL EXP" || site.Status == "LATE") && site.LastError != "" {
|
||||
nameLen := len([]rune(name))
|
||||
errSpace := nameW - nameLen - 3
|
||||
if errSpace > 10 {
|
||||
name = name + " " + subtleStyle.Render(limitStr(site.LastError, errSpace))
|
||||
}
|
||||
}
|
||||
|
||||
hist, _ := m.engine.GetHistory(site.ID)
|
||||
@@ -394,7 +483,7 @@ func (m Model) viewSitesTab() string {
|
||||
if site.Type == "push" {
|
||||
spark = heartbeatSparkline(hist.Statuses, sparkWidth)
|
||||
} else {
|
||||
spark = latencySparkline(hist.Latencies, sparkWidth)
|
||||
spark = latencySparkline(hist.Latencies, hist.Statuses, sparkWidth)
|
||||
}
|
||||
|
||||
rows = append(rows, []string{
|
||||
@@ -411,7 +500,7 @@ func (m Model) viewSitesTab() string {
|
||||
}
|
||||
return rows
|
||||
},
|
||||
colWidths,
|
||||
layout.colWidths,
|
||||
func(row, col int) *lipgloss.Style {
|
||||
if groupRows[row] {
|
||||
s := siteGroupStyle
|
||||
@@ -731,7 +820,30 @@ func (m Model) viewDetailPanel() string {
|
||||
fmt.Fprintf(&b, " %-16s %s\n", subtleStyle.Render(label), value)
|
||||
}
|
||||
|
||||
section := func(label string) {
|
||||
b.WriteString("\n" + subtleStyle.Render(" "+label) + "\n")
|
||||
}
|
||||
|
||||
row("Status", fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID)))
|
||||
|
||||
if (site.Status == "DOWN" || site.Status == "SSL EXP" || site.Status == "LATE") && site.LastError != "" {
|
||||
row("Error", dangerStyle.Render(limitStr(site.LastError, 60)))
|
||||
}
|
||||
|
||||
if site.Type == "http" && site.StatusCode > 0 {
|
||||
row("HTTP Code", strconv.Itoa(site.StatusCode))
|
||||
}
|
||||
|
||||
if !site.StatusChangedAt.IsZero() {
|
||||
dur := time.Since(site.StatusChangedAt)
|
||||
row("State Since", site.StatusChangedAt.Format("2006-01-02 15:04:05")+" ("+fmtDuration(dur)+")")
|
||||
}
|
||||
|
||||
if !site.LastSuccessAt.IsZero() {
|
||||
ago := time.Since(site.LastSuccessAt)
|
||||
row("Last Success", site.LastSuccessAt.Format("15:04:05")+" ("+fmtDuration(ago)+" ago)")
|
||||
}
|
||||
|
||||
if m.isMonitorInMaintenance(site.ID) {
|
||||
for _, mw := range m.maintenanceWindows {
|
||||
if mw.Type == "maintenance" && (mw.MonitorID == 0 || mw.MonitorID == site.ID || mw.MonitorID == site.ParentID) {
|
||||
@@ -740,6 +852,8 @@ func (m Model) viewDetailPanel() string {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
section("ENDPOINT")
|
||||
row("Type", site.Type)
|
||||
if site.URL != "" {
|
||||
row("URL", site.URL)
|
||||
@@ -750,31 +864,45 @@ func (m Model) viewDetailPanel() string {
|
||||
if site.Port > 0 {
|
||||
row("Port", strconv.Itoa(site.Port))
|
||||
}
|
||||
|
||||
section("TIMING")
|
||||
row("Interval", fmt.Sprintf("%ds", site.Interval))
|
||||
row("Timeout", fmt.Sprintf("%ds", site.Timeout))
|
||||
if site.Timeout > 0 {
|
||||
row("Timeout", fmt.Sprintf("%ds", site.Timeout))
|
||||
}
|
||||
row("Latency", fmtLatency(site.Latency))
|
||||
row("Uptime", fmtUptime(hist.Statuses))
|
||||
if !site.LastCheck.IsZero() {
|
||||
row("Last Check", site.LastCheck.Format("15:04:05"))
|
||||
}
|
||||
|
||||
if site.Type == "http" {
|
||||
row("Method", site.Method)
|
||||
row("Codes", site.AcceptedCodes)
|
||||
section("HTTP")
|
||||
if site.Method != "" && site.Method != "GET" {
|
||||
row("Method", site.Method)
|
||||
}
|
||||
codes := site.AcceptedCodes
|
||||
if codes == "" {
|
||||
codes = "200-299"
|
||||
}
|
||||
row("Codes", codes)
|
||||
row("SSL", fmtSSL(site))
|
||||
if site.IgnoreTLS {
|
||||
row("TLS Verify", dangerStyle.Render("disabled"))
|
||||
}
|
||||
}
|
||||
|
||||
if site.MaxRetries > 0 {
|
||||
row("Retries", fmtRetries(site))
|
||||
}
|
||||
if site.Regions != "" {
|
||||
row("Regions", site.Regions)
|
||||
}
|
||||
if site.Description != "" {
|
||||
row("Description", site.Description)
|
||||
}
|
||||
if !site.LastCheck.IsZero() {
|
||||
row("Last Check", site.LastCheck.Format("15:04:05"))
|
||||
if site.MaxRetries > 0 || site.Regions != "" || site.Description != "" {
|
||||
section("CONFIG")
|
||||
if site.MaxRetries > 0 {
|
||||
row("Retries", fmtRetries(site))
|
||||
}
|
||||
if site.Regions != "" {
|
||||
row("Regions", site.Regions)
|
||||
}
|
||||
if site.Description != "" {
|
||||
row("Description", site.Description)
|
||||
}
|
||||
}
|
||||
|
||||
probeResults := m.engine.GetProbeResults(site.ID)
|
||||
@@ -787,7 +915,30 @@ func (m Model) viewDetailPanel() string {
|
||||
}
|
||||
latency := time.Duration(result.LatencyNs).Milliseconds()
|
||||
ago := time.Since(result.CheckedAt).Truncate(time.Second)
|
||||
fmt.Fprintf(&b, " %-14s %s %dms %s ago\n", nodeID, status, latency, ago)
|
||||
line := fmt.Sprintf(" %-14s %s %dms %s ago", nodeID, status, latency, ago)
|
||||
if !result.IsUp && result.ErrorReason != "" {
|
||||
line += " " + dangerStyle.Render(limitStr(result.ErrorReason, 30))
|
||||
}
|
||||
b.WriteString(line + "\n")
|
||||
}
|
||||
}
|
||||
|
||||
stateChanges := m.engine.GetStateChanges(site.ID, 5)
|
||||
if len(stateChanges) > 0 {
|
||||
b.WriteString("\n" + subtleStyle.Render(" STATE CHANGES") + "\n")
|
||||
for _, sc := range stateChanges {
|
||||
ago := fmtDuration(time.Since(sc.ChangedAt))
|
||||
arrow := subtleStyle.Render(sc.FromStatus) + " → "
|
||||
if sc.ToStatus == "UP" {
|
||||
arrow += specialStyle.Render(sc.ToStatus)
|
||||
} else {
|
||||
arrow += dangerStyle.Render(sc.ToStatus)
|
||||
}
|
||||
line := fmt.Sprintf(" %s %s", arrow, subtleStyle.Render(ago+" ago"))
|
||||
if sc.ErrorReason != "" && sc.ToStatus != "UP" {
|
||||
line += " " + dangerStyle.Render(limitStr(sc.ErrorReason, 40))
|
||||
}
|
||||
b.WriteString(line + "\n")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -807,20 +958,27 @@ func (m Model) viewDetailPanel() string {
|
||||
up, len(hist.Statuses))
|
||||
}
|
||||
} else {
|
||||
b.WriteString(" " + latencySparkline(hist.Latencies, sparkWidth))
|
||||
if len(hist.Latencies) > 0 {
|
||||
minL, maxL := hist.Latencies[0], hist.Latencies[0]
|
||||
var total time.Duration
|
||||
for _, l := range hist.Latencies {
|
||||
total += l
|
||||
if l < minL {
|
||||
minL = l
|
||||
}
|
||||
if l > maxL {
|
||||
maxL = l
|
||||
}
|
||||
b.WriteString(" " + latencySparkline(hist.Latencies, hist.Statuses, sparkWidth))
|
||||
// Stats over successful checks only — a failed check is stored as 0ns latency
|
||||
// and would otherwise drag Min to 0ms and skew the average.
|
||||
var minL, maxL, total time.Duration
|
||||
count := 0
|
||||
for i, l := range hist.Latencies {
|
||||
if i < len(hist.Statuses) && !hist.Statuses[i] {
|
||||
continue
|
||||
}
|
||||
avg := total / time.Duration(len(hist.Latencies))
|
||||
if count == 0 {
|
||||
minL, maxL = l, l
|
||||
} else if l < minL {
|
||||
minL = l
|
||||
} else if l > maxL {
|
||||
maxL = l
|
||||
}
|
||||
total += l
|
||||
count++
|
||||
}
|
||||
if count > 0 {
|
||||
avg := total / time.Duration(count)
|
||||
fmt.Fprintf(&b, "\n %s %dms %s %dms %s %dms",
|
||||
subtleStyle.Render("Min"), minL.Milliseconds(),
|
||||
subtleStyle.Render("Avg"), avg.Milliseconds(),
|
||||
|
||||
@@ -32,8 +32,19 @@ func (m Model) viewUsersTab() string {
|
||||
return "\n No users configured. Press [n] to add one."
|
||||
}
|
||||
|
||||
var headers []string
|
||||
var widths []int
|
||||
if m.isWide() {
|
||||
headers = []string{"#", "USERNAME", "ROLE", "PUBLIC KEY"}
|
||||
widths = []int{4, 18, 10, 50}
|
||||
} else {
|
||||
headers = []string{"#", "USER", "ROLE", "KEY"}
|
||||
widths = []int{4, 14, 8, 30}
|
||||
}
|
||||
userW := widths[1]
|
||||
|
||||
return m.renderTable(
|
||||
[]string{"#", "USERNAME", "ROLE", "PUBLIC KEY"},
|
||||
headers,
|
||||
len(m.users),
|
||||
func(start, end int) [][]string {
|
||||
var rows [][]string
|
||||
@@ -41,14 +52,14 @@ func (m Model) viewUsersTab() string {
|
||||
u := m.users[i]
|
||||
rows = append(rows, []string{
|
||||
fmt.Sprintf("%d", i+1),
|
||||
m.zones.Mark(fmt.Sprintf("user-%d", i), limitStr(u.Username, 15)),
|
||||
m.zones.Mark(fmt.Sprintf("user-%d", i), limitStr(u.Username, userW-2)),
|
||||
fmtRole(u.Role),
|
||||
fmtKey(u.PublicKey),
|
||||
})
|
||||
}
|
||||
return rows
|
||||
},
|
||||
nil, nil,
|
||||
widths, nil,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,12 @@ var (
|
||||
|
||||
type StyleOverride func(row, col int) *lipgloss.Style
|
||||
|
||||
const wideBreakpoint = 120
|
||||
|
||||
func (m Model) isWide() bool {
|
||||
return m.termWidth >= wideBreakpoint
|
||||
}
|
||||
|
||||
func (m Model) renderTable(headers []string, items int, buildRows func(start, end int) [][]string, colWidths []int, styleOverride StyleOverride) string {
|
||||
if items == 0 {
|
||||
return ""
|
||||
@@ -28,7 +34,16 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
|
||||
selectedVisual := m.cursor - m.tableOffset
|
||||
rows := buildRows(m.tableOffset, end)
|
||||
|
||||
tableWidth := m.termWidth - chromePadH - 2
|
||||
colTotal := 0
|
||||
for _, w := range colWidths {
|
||||
colTotal += w
|
||||
}
|
||||
borderOverhead := 2 + len(colWidths) - 1
|
||||
tableWidth := colTotal + borderOverhead
|
||||
maxWidth := m.termWidth - chromePadH - 2
|
||||
if tableWidth > maxWidth {
|
||||
tableWidth = maxWidth
|
||||
}
|
||||
if tableWidth < 40 {
|
||||
tableWidth = 40
|
||||
}
|
||||
@@ -41,7 +56,11 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
|
||||
Rows(rows...).
|
||||
StyleFunc(func(row, col int) lipgloss.Style {
|
||||
if row == table.HeaderRow {
|
||||
return tableHeaderStyle
|
||||
h := tableHeaderStyle
|
||||
if col < len(colWidths) && colWidths[col] > 0 {
|
||||
h = h.Width(colWidths[col]).MaxWidth(colWidths[col])
|
||||
}
|
||||
return h
|
||||
}
|
||||
isSelected := row == selectedVisual
|
||||
if styleOverride != nil {
|
||||
@@ -51,7 +70,7 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
|
||||
style = tableSelectedStyle.Foreground(s.GetForeground())
|
||||
}
|
||||
if col < len(colWidths) && colWidths[col] > 0 {
|
||||
style = style.Width(colWidths[col])
|
||||
style = style.Width(colWidths[col]).MaxWidth(colWidths[col])
|
||||
}
|
||||
return style
|
||||
}
|
||||
@@ -64,7 +83,7 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
|
||||
base = tableSelectedStyle
|
||||
}
|
||||
if col < len(colWidths) && colWidths[col] > 0 {
|
||||
base = base.Width(colWidths[col])
|
||||
base = base.Width(colWidths[col]).MaxWidth(colWidths[col])
|
||||
}
|
||||
return base
|
||||
})
|
||||
|
||||
+84
-17
@@ -4,13 +4,14 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerko/uptop/internal/store"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
|
||||
"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
|
||||
|
||||
"github.com/charmbracelet/bubbles/viewport"
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
@@ -68,6 +69,7 @@ const (
|
||||
stateLogs
|
||||
stateUsers
|
||||
stateDetail
|
||||
stateAlertDetail
|
||||
stateFormSite
|
||||
stateFormAlert
|
||||
stateFormUser
|
||||
@@ -92,9 +94,10 @@ type Model struct {
|
||||
userFormData *userFormData
|
||||
maintFormData *maintFormData
|
||||
|
||||
logViewport viewport.Model
|
||||
isAdmin bool
|
||||
zones *zone.Manager
|
||||
logViewport viewport.Model
|
||||
logFilterImportant bool
|
||||
isAdmin bool
|
||||
zones *zone.Manager
|
||||
|
||||
deleteID int
|
||||
deleteName string
|
||||
@@ -120,6 +123,10 @@ type Model struct {
|
||||
|
||||
filterMode bool
|
||||
filterText string
|
||||
|
||||
// demoMode renders a stable status dot instead of the animated pulse so
|
||||
// screenshots/recordings don't capture the spinner mid-frame. Set via UPTOP_DEMO=1.
|
||||
demoMode bool
|
||||
}
|
||||
|
||||
func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine) Model {
|
||||
@@ -153,6 +160,7 @@ func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine) Model {
|
||||
collapsed: collapsed,
|
||||
theme: theme,
|
||||
themeIndex: themeIdx,
|
||||
demoMode: os.Getenv("UPTOP_DEMO") == "1",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -383,6 +391,14 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
return m, tea.Quit
|
||||
}
|
||||
return m, nil
|
||||
case stateAlertDetail:
|
||||
switch msg.String() {
|
||||
case "i", "esc":
|
||||
m.state = stateDashboard
|
||||
case "q":
|
||||
return m, tea.Quit
|
||||
}
|
||||
return m, nil
|
||||
case stateDashboard, stateLogs, stateUsers:
|
||||
switch msg.String() {
|
||||
case "q":
|
||||
@@ -392,6 +408,11 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.filterMode = true
|
||||
return m, nil
|
||||
}
|
||||
case "f":
|
||||
if m.state == stateLogs {
|
||||
m.logFilterImportant = !m.logFilterImportant
|
||||
return m, nil
|
||||
}
|
||||
case "tab":
|
||||
m.switchTab(m.currentTab + 1)
|
||||
case "pgup", "pgdown":
|
||||
@@ -463,6 +484,16 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.state = stateFormUser
|
||||
return m, m.initUserHuhForm()
|
||||
}
|
||||
case "t":
|
||||
if m.currentTab == 1 && len(m.alerts) > 0 {
|
||||
a := m.alerts[m.cursor]
|
||||
go func() {
|
||||
if err := m.engine.TestAlert(a.ID); err != nil {
|
||||
m.engine.AddLog(fmt.Sprintf("Test alert failed (%s): %v", a.Name, err))
|
||||
}
|
||||
}()
|
||||
return m, nil
|
||||
}
|
||||
case " ":
|
||||
if m.currentTab == 0 && len(m.sites) > 0 && m.sites[m.cursor].Type == "group" {
|
||||
gid := m.sites[m.cursor].ID
|
||||
@@ -481,6 +512,8 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
case "i":
|
||||
if m.currentTab == 0 && len(m.sites) > 0 {
|
||||
m.state = stateDetail
|
||||
} else if m.currentTab == 1 && len(m.alerts) > 0 {
|
||||
m.state = stateAlertDetail
|
||||
}
|
||||
case "x":
|
||||
if m.currentTab == 4 && len(m.maintenanceWindows) > 0 {
|
||||
@@ -727,11 +760,6 @@ func (m *Model) submitForm() {
|
||||
}
|
||||
|
||||
func (m Model) pulseIndicator() string {
|
||||
frame := m.tickCount % len(pulseFrames)
|
||||
brightness := int(m.pulsePos*155) + 100
|
||||
if brightness > 255 {
|
||||
brightness = 255
|
||||
}
|
||||
hasDown := false
|
||||
for _, s := range m.sites {
|
||||
if !s.Paused && !m.isMonitorInMaintenance(s.ID) && (s.Status == "DOWN" || s.Status == "SSL EXP") {
|
||||
@@ -739,6 +767,19 @@ func (m Model) pulseIndicator() string {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Stills can't show animation: render a stable status dot in demo mode.
|
||||
if m.demoMode {
|
||||
c := m.theme.Success
|
||||
if hasDown {
|
||||
c = m.theme.Danger
|
||||
}
|
||||
return lipgloss.NewStyle().Foreground(c).Render("●")
|
||||
}
|
||||
frame := m.tickCount % len(pulseFrames)
|
||||
brightness := int(m.pulsePos*155) + 100
|
||||
if brightness > 255 {
|
||||
brightness = 255
|
||||
}
|
||||
var color string
|
||||
if hasDown {
|
||||
color = fmt.Sprintf("#%02x%02x%02x", brightness, brightness/4, brightness/4)
|
||||
@@ -802,6 +843,8 @@ func (m Model) View() string {
|
||||
return ""
|
||||
case stateDetail:
|
||||
return m.viewDetailPanel()
|
||||
case stateAlertDetail:
|
||||
return m.viewAlertDetailPanel()
|
||||
default:
|
||||
return m.zones.Scan(m.viewDashboard())
|
||||
}
|
||||
@@ -811,13 +854,20 @@ func (m Model) viewDashboard() string {
|
||||
allSites := m.engine.GetAllSites()
|
||||
totalMonitors := 0
|
||||
downCount := 0
|
||||
lateCount := 0
|
||||
for _, s := range allSites {
|
||||
if s.Type == "group" {
|
||||
continue
|
||||
}
|
||||
totalMonitors++
|
||||
if !s.Paused && !m.isMonitorInMaintenance(s.ID) && (s.Status == "DOWN" || s.Status == "SSL EXP") {
|
||||
if s.Paused || m.isMonitorInMaintenance(s.ID) {
|
||||
continue
|
||||
}
|
||||
switch s.Status {
|
||||
case "DOWN", "SSL EXP":
|
||||
downCount++
|
||||
case "LATE":
|
||||
lateCount++
|
||||
}
|
||||
}
|
||||
offlineNodes := 0
|
||||
@@ -830,6 +880,8 @@ func (m Model) viewDashboard() string {
|
||||
var sitesLabel string
|
||||
if downCount > 0 {
|
||||
sitesLabel = fmt.Sprintf("Sites (%d↓)", downCount)
|
||||
} else if lateCount > 0 {
|
||||
sitesLabel = fmt.Sprintf("Sites (%d⚠)", lateCount)
|
||||
} else if totalMonitors > 0 {
|
||||
sitesLabel = fmt.Sprintf("Sites (%d)", totalMonitors)
|
||||
} else {
|
||||
@@ -895,14 +947,19 @@ func (m Model) viewDashboard() string {
|
||||
}
|
||||
}
|
||||
|
||||
upCount := totalMonitors - downCount
|
||||
upCount := totalMonitors - downCount - lateCount
|
||||
var upStr string
|
||||
if downCount > 0 {
|
||||
upStr = dangerStyle.Render(fmt.Sprintf("%d/%d UP", upCount, totalMonitors))
|
||||
} else if lateCount > 0 {
|
||||
upStr = warnStyle.Render(fmt.Sprintf("%d/%d UP", upCount, totalMonitors))
|
||||
} else {
|
||||
upStr = specialStyle.Render(fmt.Sprintf("%d/%d UP", upCount, totalMonitors))
|
||||
}
|
||||
statusParts := []string{upStr}
|
||||
if lateCount > 0 {
|
||||
statusParts = append(statusParts, warnStyle.Render(fmt.Sprintf("%d LATE", lateCount)))
|
||||
}
|
||||
if len(m.nodes) > 0 {
|
||||
online := 0
|
||||
for _, n := range m.nodes {
|
||||
@@ -910,7 +967,11 @@ func (m Model) viewDashboard() string {
|
||||
online++
|
||||
}
|
||||
}
|
||||
statusParts = append(statusParts, fmt.Sprintf("%d probes", online))
|
||||
probeLabel := "probes"
|
||||
if online == 1 {
|
||||
probeLabel = "probe"
|
||||
}
|
||||
statusParts = append(statusParts, fmt.Sprintf("%d %s", online, probeLabel))
|
||||
}
|
||||
statusLine := strings.Join(statusParts, subtleStyle.Render(" · "))
|
||||
|
||||
@@ -923,6 +984,10 @@ func (m Model) viewDashboard() string {
|
||||
switch m.currentTab {
|
||||
case 0:
|
||||
keys = "[/]Filter [n]New [e]Edit [i]Info [d]Del [p]Pause [T]Theme [Tab]Switch [q]Quit"
|
||||
case 1:
|
||||
keys = "[n]New [e]Edit [i]Info [d]Del [t]Test [T]Theme [Tab]Switch [q]Quit"
|
||||
case 2:
|
||||
keys = "[f]Filter [T]Theme [Tab]Switch [q]Quit"
|
||||
case 4:
|
||||
keys = "[n]New [x]End [d]Del [T]Theme [Tab]Switch [q]Quit"
|
||||
case 5:
|
||||
@@ -949,10 +1014,12 @@ func siteOrder(s models.Site) int {
|
||||
switch s.Status {
|
||||
case "DOWN", "SSL EXP":
|
||||
return 0
|
||||
case "PENDING":
|
||||
return 2
|
||||
default:
|
||||
case "LATE":
|
||||
return 1
|
||||
case "PENDING":
|
||||
return 3
|
||||
default:
|
||||
return 2
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user