fix(release): remove tagged scan image in cleanup step

fix(release): scan gates docker push, rc tags spare :latest, mirror waits for stable assets
rc.2 proved the grype gate was decorative — buildx pushed before the scan ran, so a red run still shipped the image (and rc tags moved :latest). Build amd64 locally, scan that, then run the multi-arch push from the warm builder cache. :latest now only moves on non-rc tags. mirror-release: poll until the Gitea asset count is stable across two polls (GoReleaser uploads sequentially — assets>0 could mirror a partial set) and stretch the timeout to 20 min since the release run can queue behind the Docker job on the single runner.
2026-06-12 17:21:42 -04:00 · 2026-06-12 17:20:48 -04:00 · 2026-06-12 17:02:55 -04:00 · 2026-06-12 16:16:28 -04:00 · 2026-06-12 15:37:47 -04:00 · 2026-06-12 15:31:57 -04:00
81 changed files with 6162 additions and 2371 deletions
@@ -10,3 +10,5 @@ vendor/
 *.local
 .env
 .github/
+dist/
+uptop
@@ -0,0 +1,46 @@
+name: Bug Report
+about: Something isn't working as expected
+labels:
+  - bug
+body:
+  - type: checkboxes
+    id: search
+    attributes:
+      label: Before filing
+      options:
+        - label: I searched existing issues and didn't find a match
+          required: true
+  - type: textarea
+    id: description
+    attributes:
+      label: What happened?
+      description: Include what you expected to happen instead.
+      placeholder: |
+        When I run `uptop serve`, the TUI crashes after 10 seconds.
+        I expected it to keep running and display monitor status.
+    validations:
+      required: true
+  - type: textarea
+    id: reproduction
+    attributes:
+      label: Steps to reproduce
+      placeholder: |
+        1. Run `uptop serve`
+        2. Wait ~10 seconds
+        3. TUI crashes with panic
+    validations:
+      required: true
+  - type: textarea
+    id: environment
+    attributes:
+      label: Environment & logs
+      description: Output of `uptop version`, OS, terminal. Paste any errors below.
+      render: shell
+      placeholder: |
+        uptop version 2026.06.1
+        OS: Debian 13
+        Terminal: Ghostty
+
+        [paste any error output here]
+    validations:
+      required: false
@@ -0,0 +1,20 @@
+name: Feature Request
+about: Suggest a new feature or enhancement
+labels:
+  - feature
+body:
+  - type: textarea
+    id: problem
+    attributes:
+      label: Problem
+      description: What's frustrating or missing?
+      placeholder: I find myself always needing to ...
+    validations:
+      required: true
+  - type: textarea
+    id: solution
+    attributes:
+      label: Proposed solution
+      description: How would you like this to work?
+    validations:
+      required: false
@@ -65,7 +65,7 @@ jobs:
          go-version: "1.26"

      - name: Install govulncheck
-        run: go install golang.org/x/vuln/cmd/govulncheck@latest
+        run: go install golang.org/x/vuln/cmd/govulncheck@v1.1.4

      - name: Run govulncheck
        run: govulncheck ./...
@@ -3,7 +3,7 @@ name: Release Binaries
 on:
  push:
    tags:
-      - "[0-9]*"
+      - "v[0-9]*"

 jobs:
  release:
@@ -13,7 +13,7 @@ jobs:
        shell: sh
    steps:
      - name: Install build tools
-        run: apk add --no-cache git gcc musl-dev
+        run: apk add --no-cache git

      - uses: actions/checkout@v4
        with:
@@ -33,8 +33,8 @@ jobs:

      - name: Install git-cliff
        run: |
-          apk add --no-cache curl jq
-          VERSION=$(curl -sS https://api.github.com/repos/orhun/git-cliff/releases/latest | jq -r '.tag_name' | sed 's/^v//')
+          apk add --no-cache curl
+          VERSION=2.13.1
          curl -sSL "https://github.com/orhun/git-cliff/releases/download/v${VERSION}/git-cliff-${VERSION}-x86_64-unknown-linux-musl.tar.gz" | tar xz -C /tmp
          mv /tmp/git-cliff-*/git-cliff /usr/local/bin/
          git-cliff --version
@@ -52,3 +52,7 @@ jobs:
          GORELEASER_FORCE_TOKEN: gitea
          GITEA_TOKEN: ${{ secrets.RELEASE_TOKEN }}
          GITEA_API_URL: http://gitea:3000/api/v1
+
+      # GitHub release relaying is handled by .github/workflows/mirror-release.yml,
+      # which runs on GitHub Actions when the push mirror delivers the tag and
+      # copies this run's Gitea release assets — no PAT needed on this side.
@@ -3,11 +3,11 @@ name: Release Docker
 on:
  push:
    tags:
-      - "[0-9]*"
+      - "v[0-9]*"
  workflow_dispatch:
    inputs:
      tag:
-        description: "Image tag (e.g. 2026.06.1). Defaults to latest commit SHA."
+        description: "Image tag (e.g. 1.0.0, no v prefix). Defaults to latest commit SHA."
        required: false

 jobs:
@@ -27,10 +27,23 @@ jobs:
              TAG="${{ github.sha }}"
            fi
          else
+            # Docker convention: git tag v1.2.3 -> image tag 1.2.3
            TAG="${{ github.ref_name }}"
+            TAG="${TAG#v}"
          fi
          echo "tag=$TAG" >> "$GITHUB_OUTPUT"

+          TAGS="lerkolabs/uptop:${TAG}"
+          TAGS="${TAGS},lerkolabs/uptop:sha-${SHORT_SHA}"
+          # :latest only for real releases — rc rehearsal tags must not move it
+          if [ "${{ github.ref_type }}" = "tag" ]; then
+            case "$TAG" in
+              *-*) ;;
+              *) TAGS="${TAGS},lerkolabs/uptop:latest" ;;
+            esac
+          fi
+          echo "tags=$TAGS" >> "$GITHUB_OUTPUT"
+
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3

@@ -43,6 +56,26 @@ jobs:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

+      # Scan must gate the push: build amd64 locally, scan it, and only then run
+      # the multi-arch push (amd64 layers come from the builder cache, so the
+      # second build only adds the arm64 work).
+      - name: Build for scan (amd64, local)
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          load: true
+          platforms: linux/amd64
+          tags: uptop-scan:${{ steps.meta.outputs.tag }}
+          build-args: |
+            VERSION=${{ steps.meta.outputs.tag }}
+            COMMIT=${{ github.sha }}
+            BUILD_DATE=${{ github.event.head_commit.timestamp }}
+
+      - name: Scan image for CVEs
+        run: |
+          curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin v0.114.0
+          grype uptop-scan:${{ steps.meta.outputs.tag }} --fail-on critical --output table
+
      - name: Build and push
        uses: docker/build-push-action@v5
        with:
@@ -51,20 +84,12 @@ jobs:
          platforms: linux/amd64,linux/arm64
          sbom: true
          provenance: mode=max
-          tags: |
-            lerkolabs/uptop:${{ steps.meta.outputs.tag }}
-            lerkolabs/uptop:latest
-            lerkolabs/uptop:sha-${{ steps.meta.outputs.short_sha }}
+          tags: ${{ steps.meta.outputs.tags }}
          build-args: |
            VERSION=${{ steps.meta.outputs.tag }}
            COMMIT=${{ github.sha }}
            BUILD_DATE=${{ github.event.head_commit.timestamp }}

-      - name: Scan image for CVEs
-        run: |
-          curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin
-          grype lerkolabs/uptop:${{ steps.meta.outputs.tag }} --fail-on critical --output table || echo "::warning::CVE scan found critical issues — review output above"
-
      - name: Update Docker Hub description
        uses: peter-evans/dockerhub-description@v4
        with:
@@ -75,5 +100,7 @@ jobs:
      - name: Cleanup Docker artifacts
        if: always()
        run: |
+          # the scan image is tagged, so image prune won't catch it
+          docker image rm "uptop-scan:${{ steps.meta.outputs.tag }}" 2>/dev/null || true
          docker image prune -f
          docker builder prune -f --keep-storage=2GB
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Report a Bug
+    url: https://gitea.lerkolabs.com/lerkolabs/uptop/issues/new?template=bug_report.yaml
+    about: Report bugs on our Gitea instance
+  - name: Request a Feature
+    url: https://gitea.lerkolabs.com/lerkolabs/uptop/issues/new?template=feature_request.yaml
+    about: Suggest features on our Gitea instance
@@ -3,7 +3,7 @@ name: Mirror Release to GitHub
 on:
  push:
    tags:
-      - "[0-9]*"
+      - "v[0-9]*"

 permissions:
  contents: write
@@ -19,26 +19,35 @@ jobs:
        run: |
          API="https://gitea.lerkolabs.com/api/v1/repos/lerkolabs/uptop/releases/tags/${TAG}"

-          for i in $(seq 1 20); do
+          # 40 x 30s = 20 min: the Gitea release can queue behind the ~18-min
+          # Docker job on the single runner. Asset count must hold steady for
+          # two consecutive polls — GoReleaser uploads one file at a time, and
+          # mirroring mid-upload would publish a partial asset set.
+          PREV_COUNT=0
+          ASSET_COUNT=0
+          for i in $(seq 1 40); do
            if RESPONSE=$(curl -sf "$API" 2>/dev/null); then
              ASSET_COUNT=$(echo "$RESPONSE" | jq '.assets | length')
-              if [ "$ASSET_COUNT" -gt 0 ]; then
-                echo "Found release with $ASSET_COUNT assets"
+              if [ "$ASSET_COUNT" -gt 0 ] && [ "$ASSET_COUNT" -eq "$PREV_COUNT" ]; then
+                echo "Found release with $ASSET_COUNT assets (stable)"
                break
              fi
-              echo "Release exists but no assets yet... attempt $i/20"
+              echo "Release has $ASSET_COUNT assets (was $PREV_COUNT)... attempt $i/40"
+              PREV_COUNT="$ASSET_COUNT"
            else
-              echo "Waiting for Gitea release... attempt $i/20"
+              echo "Waiting for Gitea release... attempt $i/40"
            fi
            sleep 30
          done

          if [ -z "$RESPONSE" ] || [ "$ASSET_COUNT" -eq 0 ]; then
-            echo "::error::Gitea release for ${TAG} not found or has no assets after 10 minutes"
+            echo "::error::Gitea release for ${TAG} not found or has no assets after 20 minutes"
            exit 1
          fi

-          echo "$RESPONSE" | jq -r '.body // empty' > /tmp/release-notes.md
+          # select() so an empty-string body produces an empty file — `// empty`
+          # treats "" as truthy and wrote a blank line, defeating this fallback.
+          echo "$RESPONSE" | jq -r '.body | select(. != null and . != "")' > /tmp/release-notes.md

          if [ ! -s /tmp/release-notes.md ]; then
            echo "Release ${TAG} from [Gitea](https://gitea.lerkolabs.com/lerkolabs/uptop/releases/tag/${TAG})" > /tmp/release-notes.md
@@ -62,8 +71,11 @@ jobs:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          TAG: ${{ github.ref_name }}
        run: |
+          PRERELEASE=""
+          case "$TAG" in *-*) PRERELEASE="--prerelease" ;; esac
          gh release create "$TAG" \
            --repo "$GITHUB_REPOSITORY" \
            --title "$TAG" \
            --notes-file /tmp/release-notes.md \
+            $PRERELEASE \
            /tmp/assets/*
@@ -8,16 +8,20 @@ release:
  gitea:
    owner: lerkolabs
    name: uptop
+  prerelease: auto

 builds:
-  - main: ./cmd/uptop/main.go
+  - main: ./cmd/uptop
    binary: uptop
    env:
-      - CGO_ENABLED=1
+      - CGO_ENABLED=0
    goos:
      - linux
+      - darwin
+      - windows
    goarch:
      - amd64
+      - arm64
    ldflags:
      - -s -w
      - -X main.version={{ .Version }}
@@ -29,6 +33,9 @@ builds:
 archives:
  - formats: [tar.gz]
    name_template: "{{ .ProjectName }}_{{ .Os }}_{{ .Arch }}"
+    format_overrides:
+      - goos: windows
+        formats: [zip]

 checksum:
  name_template: checksums.txt
@@ -52,22 +59,7 @@ nfpms:
        dst: /usr/share/doc/uptop/LICENSE
        type: doc

-homebrew_casks:
-  - name: uptop
-    homepage: https://gitea.lerkolabs.com/lerkolabs/uptop
-    description: Self-hosted uptime monitoring with a TUI over SSH
-    directory: Casks
-    skip_upload: true
-    commit_msg_template: "update uptop to {{ .Tag }}"
-    url:
-      template: "https://gitea.lerkolabs.com/lerkolabs/uptop/releases/download/{{ .Tag }}/{{ .ArtifactName }}"
-    repository:
-      owner: lerkolabs
-      name: homebrew-tap
-      git:
-        url: "ssh://git@gitea.lerkolabs.com:2222/lerkolabs/homebrew-tap.git"
-        private_key: "{{ if index .Env \"TAP_SSH_KEY\" }}{{ .Env.TAP_SSH_KEY }}{{ end }}"
-        ssh_command: "ssh -o StrictHostKeyChecking=accept-new"
-
-changelog:
-  disable: true
+# Changelog generation must stay enabled: the --release-notes flag is consumed
+# by the changelog pipe, so disabling it silently drops the git-cliff notes
+# (empty release body on v0.1.0-rc.1). With --release-notes set, GoReleaser
+# skips its own generation and uses the file.
@@ -0,0 +1,11 @@
+ignore:
+  # SCP path traversal in charmbracelet/wish — same flaw, two ids: grype has
+  # matched it as CVE-2026-41589 and as GHSA-xjvp-7243-rg9h depending on db
+  # version, and ignore matching is exact-id, so both stay listed.
+  # We only import wish/bubbletea for the SSH TUI server — the vulnerable
+  # scp.Middleware / scp.NewFileSystemHandler symbols are never compiled in
+  # (govulncheck reachability agrees). No fix for wish v1; v2
+  # (charm.land/wish/v2 >= 2.0.1) requires the bubbletea-v2 stack migration,
+  # tracked in issue #126. Remove both entries when that lands.
+  - vulnerability: CVE-2026-41589
+  - vulnerability: GHSA-xjvp-7243-rg9h
@@ -1,18 +1,17 @@
 # --- Stage 1: Builder ---
-FROM golang:1.26-alpine3.23@sha256:91eda9776261207ea25fd06b5b7fed8d397dd2c0a283e77f2ab6e91bfa71079d AS builder
-RUN apk add --no-cache gcc musl-dev
+FROM golang:1.26.4-alpine3.23@sha256:f23e8b227fb4493eabe03bede4d5a32d04092da71962f1fb79b5f7d1e6c2a17f AS builder
 WORKDIR /app
 COPY go.mod go.sum ./
 RUN --mount=type=cache,target=/go/pkg/mod \
    go mod download
 COPY . .
-ENV CGO_ENABLED=1
+ENV CGO_ENABLED=0
 ARG VERSION=dev
 ARG COMMIT=none
 ARG BUILD_DATE=unknown
 RUN --mount=type=cache,target=/go/pkg/mod \
    --mount=type=cache,target=/root/.cache/go-build \
-    go build -trimpath -ldflags="-s -w -X main.version=${VERSION} -X main.commit=${COMMIT} -X main.date=${BUILD_DATE}" -o uptop ./cmd/uptop/main.go
+    go build -trimpath -ldflags="-s -w -X main.version=${VERSION} -X main.commit=${COMMIT} -X main.date=${BUILD_DATE}" -o uptop ./cmd/uptop

 # --- Stage 2: Runner ---
 FROM alpine:3.23@sha256:5b10f432ef3da1b8d4c7eb6c487f2f5a8f096bc91145e68878dd4a5019afde11
@@ -32,6 +31,8 @@ ENV UPTOP_SSH_HOST_KEY=/data/.ssh/id_ed25519
 ENV UPTOP_PORT=23234

 EXPOSE 23234
+HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
+  CMD wget -qO- http://localhost:8080/api/health || exit 1
 USER uptop
 ENTRYPOINT ["docker-entrypoint.sh"]
 CMD ["./uptop"]
@@ -22,7 +22,7 @@ Built on [RDGames/go-upkeep](https://github.com/RDGames/go-upkeep). Rewritten fo
 ## Features

 - **6 check types** — HTTP, Push (heartbeat), Ping, Port, DNS, Groups
- **9 alert providers** — Discord, Slack, Email, Ntfy, Webhook, Telegram, PagerDuty, Pushover, Gotify
+- **10 alert providers** — Discord, Slack, Email, Ntfy, Webhook, Telegram, PagerDuty, Pushover, Gotify, Opsgenie
 - **Config as code** — define monitors in YAML, apply declaratively, version control your setup
 - **HA clustering** — leader/follower with automatic failover
 - **Prometheus metrics** — `/metrics` endpoint, wire it straight to Grafana
@@ -30,6 +30,8 @@ Built on [RDGames/go-upkeep](https://github.com/RDGames/go-upkeep). Rewritten fo
 - **SQLite or Postgres** — SQLite for single-node, Postgres for production
 - **Uptime Kuma import** — migrate from Kuma with one command

+> Group monitors roll up child status for display but don't fire their own alerts yet — attach alerts to the children.
+
 ## Screenshots

 <table>
@@ -79,10 +81,14 @@ services:
      # - UPTOP_ADMIN_KEY=ssh-ed25519 AAAA... you@host
    volumes:
      - ./data:/data
+    sysctls:
+      - net.ipv4.ping_group_range=0 2147483647
 ```

 First run: set `UPTOP_ADMIN_KEY` to your SSH public key, or attach to the container and add it in the Users tab.

+The `sysctls` line enables unprivileged ICMP inside the container — without it, ping monitors get no response and silently report DOWN.
+
 </details>

 <details>
@@ -138,9 +144,22 @@ Full reference in [docs/config-as-code.md](docs/config-as-code.md).
 | `UPTOP_INSECURE_SKIP_VERIFY` | `false` | Skip TLS verification for checks |
 | `UPTOP_ALLOW_PRIVATE_TARGETS` | `false` | Allow monitoring RFC1918/loopback addresses |
 | `UPTOP_ADMIN_KEY` | | SSH public key seeded as first admin on startup |
+| `UPTOP_TRUSTED_PROXIES` | | Comma-separated CIDRs/IPs whose `X-Forwarded-For` is trusted ([details](#running-behind-a-reverse-proxy)) |

 See [`.env.example`](.env.example) for all options including TLS, probes, and advanced settings.

+### Running behind a reverse proxy
+
+By default uptop ignores the `X-Forwarded-For` header and rate-limits by the direct connection address — so a client can't spoof the header to bypass limits. If uptop sits behind a reverse proxy (nginx, Caddy, Cloudflare, an ALB), set `UPTOP_TRUSTED_PROXIES` to the proxy's address(es) so the real client IP is used instead:
+
+    # single nginx/Caddy on the same host
+    UPTOP_TRUSTED_PROXIES=127.0.0.1
+
+    # a proxy subnet, or Cloudflare ranges
+    UPTOP_TRUSTED_PROXIES=10.0.0.0/8,172.16.0.0/12
+
+Only requests whose immediate peer is in this list have their `X-Forwarded-For` honored (right-most non-trusted hop wins). Bare IPs are treated as single hosts; invalid entries are warned about and skipped. Leave it unset if uptop is exposed directly.
+
 ### Encryption

 Set `UPTOP_ENCRYPTION_KEY` to encrypt alert credentials (SMTP passwords, webhook URLs, API tokens) at rest with AES-256-GCM. Generate a key:
@@ -149,6 +168,19 @@ Set `UPTOP_ENCRYPTION_KEY` to encrypt alert credentials (SMTP passwords, webhook

 Without this, credentials are stored as plaintext in the database. uptop warns on startup if unset. To encrypt credentials on an existing install, run `uptop migrate-secrets` with the key set.

+### Data retention
+
+uptop prunes its own history in the background — no external cleanup jobs needed:
+
+| Data | Kept |
+|---|---|
+| Check history | newest 1,000 checks per monitor |
+| State changes (UP/DOWN transitions) | newest 5,000 per monitor |
+| Logs | newest 200 entries |
+| Maintenance windows | 7 days after they end (configurable) |
+
+Sparklines, uptime percentages, and SLA reports are computed from these windows, so very long-horizon stats aren't retained. Export to Prometheus via `/metrics` if you need unlimited history.
+
 ## Clustering

 uptop supports three modes: **leader** (default single node), **follower** (HA failover — takes over if the leader goes down), and **probe** (stateless distributed checks from multiple regions).
@@ -161,7 +193,7 @@ Export your Kuma backup JSON, then:

 ```bash
 curl -X POST http://localhost:8080/api/import/kuma \
-  -H "X-Upkeep-Secret: your-secret" \
+  -H "X-Uptop-Secret: your-secret" \
  -H "Content-Type: application/json" \
  -d @kuma-backup.json
 ```
@@ -23,7 +23,11 @@ filter_unconventional = true
 split_commits = false
 protect_breaking_commits = false
 filter_commits = false
-tag_pattern = "[0-9]*"
+tag_pattern = "v[0-9].*"
+# rc tags are pipeline rehearsals, not releases — without this, the final
+# tag's notes would only cover commits since the last rc (near-empty for
+# v0.1.0). Ignored tags fold their commits into the next real release.
+ignore_tags = "v.*-rc.*"
 topo_order = false
 sort_commits = "oldest"

@@ -0,0 +1,133 @@
+package main
+
+import (
+	"net"
+	"os"
+	"strconv"
+	"time"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/server"
+)
+
+type appConfig struct {
+	Port       int
+	SSHHostKey string
+
+	DBType string
+	DBDSN  string
+
+	HTTPPort int
+	TLSCert  string
+	TLSKey   string
+
+	StatusEnabled bool
+	StatusTitle   string
+
+	ClusterMode   string
+	ClusterSecret string
+	PeerURL       string
+	NodeID        string
+	NodeName      string
+	NodeRegion    string
+
+	AggStrategy         string
+	AllowPrivateTargets bool
+	InsecureSkipVerify  bool
+	MaintRetention      time.Duration
+	EncryptionKey       string
+
+	MetricsPublic  bool
+	CORSOrigin     string
+	TrustedProxies []*net.IPNet
+
+	AdminKey string
+	KeysFile string
+}
+
+func parseConfig() appConfig {
+	cfg := appConfig{
+		Port:           23234,
+		SSHHostKey:     ".ssh/id_ed25519",
+		DBType:         "sqlite",
+		DBDSN:          "uptop.db",
+		HTTPPort:       8080,
+		StatusTitle:    "System Status",
+		ClusterMode:    "leader",
+		MaintRetention: 7 * 24 * time.Hour,
+	}
+
+	if v := os.Getenv("UPTOP_PORT"); v != "" {
+		if n, err := strconv.Atoi(v); err == nil {
+			cfg.Port = n
+		}
+	}
+	if v := os.Getenv("UPTOP_DB_TYPE"); v != "" {
+		cfg.DBType = v
+	}
+	if v := os.Getenv("UPTOP_DB_DSN"); v != "" {
+		cfg.DBDSN = v
+	}
+	if v := os.Getenv("UPTOP_HTTP_PORT"); v != "" {
+		if n, err := strconv.Atoi(v); err == nil {
+			cfg.HTTPPort = n
+		}
+	}
+	if os.Getenv("UPTOP_STATUS_ENABLED") == "true" {
+		cfg.StatusEnabled = true
+	}
+	if v := os.Getenv("UPTOP_STATUS_TITLE"); v != "" {
+		cfg.StatusTitle = v
+	}
+	if v := os.Getenv("UPTOP_CLUSTER_MODE"); v != "" {
+		cfg.ClusterMode = v
+	}
+	if v := os.Getenv("UPTOP_PEER_URL"); v != "" {
+		cfg.PeerURL = v
+	}
+	if v := os.Getenv("UPTOP_CLUSTER_SECRET"); v != "" {
+		cfg.ClusterSecret = v
+	}
+
+	cfg.NodeID = os.Getenv("UPTOP_NODE_ID")
+	cfg.NodeName = os.Getenv("UPTOP_NODE_NAME")
+	cfg.NodeRegion = os.Getenv("UPTOP_NODE_REGION")
+	cfg.AggStrategy = os.Getenv("UPTOP_AGG_STRATEGY")
+
+	cfg.AllowPrivateTargets = os.Getenv("UPTOP_ALLOW_PRIVATE_TARGETS") == "true"
+	cfg.InsecureSkipVerify = os.Getenv("UPTOP_INSECURE_SKIP_VERIFY") == "true"
+	cfg.MetricsPublic = os.Getenv("UPTOP_METRICS_PUBLIC") == "true"
+
+	cfg.EncryptionKey = os.Getenv("UPTOP_ENCRYPTION_KEY")
+	cfg.TLSCert = os.Getenv("UPTOP_TLS_CERT")
+	cfg.TLSKey = os.Getenv("UPTOP_TLS_KEY")
+	cfg.CORSOrigin = os.Getenv("UPTOP_CORS_ORIGIN")
+	cfg.TrustedProxies = parseTrustedProxies(os.Getenv("UPTOP_TRUSTED_PROXIES"))
+
+	cfg.SSHHostKey = envOrDefault("UPTOP_SSH_HOST_KEY", cfg.SSHHostKey)
+	cfg.AdminKey = os.Getenv("UPTOP_ADMIN_KEY")
+	cfg.KeysFile = os.Getenv("UPTOP_KEYS")
+
+	if v := os.Getenv("UPTOP_MAINT_RETENTION"); v != "" {
+		if d, err := time.ParseDuration(v); err == nil && d > 0 {
+			cfg.MaintRetention = d
+		}
+	}
+
+	return cfg
+}
+
+func (c appConfig) serverConfig(quietHTTPLog bool) server.ServerConfig {
+	return server.ServerConfig{
+		Port:           c.HTTPPort,
+		EnableStatus:   c.StatusEnabled,
+		Title:          c.StatusTitle,
+		ClusterKey:     c.ClusterSecret,
+		TLSCert:        c.TLSCert,
+		TLSKey:         c.TLSKey,
+		ClusterMode:    c.ClusterMode,
+		MetricsPublic:  c.MetricsPublic,
+		CORSOrigin:     c.CORSOrigin,
+		TrustedProxies: c.TrustedProxies,
+		QuietHTTPLog:   quietHTTPLog,
+	}
+}
@@ -0,0 +1,115 @@
+package main
+
+import (
+	"context"
+	"crypto/ed25519"
+	"crypto/rand"
+	"errors"
+	"testing"
+	"time"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store/storetest"
+
+	"github.com/charmbracelet/ssh"
+	gossh "golang.org/x/crypto/ssh"
+)
+
+// kcMockStore embeds BaseMock for default no-ops; only GetAllUsers is
+// overridden because the tests mutate users/err between calls.
+type kcMockStore struct {
+	storetest.BaseMock
+	users []models.User
+	err   error
+}
+
+func (m *kcMockStore) GetAllUsers(_ context.Context) ([]models.User, error) { return m.users, m.err }
+
+func testKey(t *testing.T) (string, ssh.PublicKey) {
+	t.Helper()
+	pub, _, err := ed25519.GenerateKey(rand.Reader)
+	if err != nil {
+		t.Fatal(err)
+	}
+	sk, err := gossh.NewPublicKey(pub)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return string(gossh.MarshalAuthorizedKey(sk)), sk
+}
+
+func TestKeyCache_AllowsKnownDeniesUnknown(t *testing.T) {
+	authorized, known := testKey(t)
+	_, unknown := testKey(t)
+	kc := newKeyCache(&kcMockStore{users: []models.User{{PublicKey: authorized}}})
+
+	if !kc.IsAllowed(known) {
+		t.Error("known key denied")
+	}
+	if kc.IsAllowed(unknown) {
+		t.Error("unknown key allowed")
+	}
+}
+
+func TestKeyCache_RetainsKeysOnRefreshError(t *testing.T) {
+	authorized, known := testKey(t)
+	ms := &kcMockStore{users: []models.User{{PublicKey: authorized}}}
+	kc := newKeyCache(ms)
+
+	if !kc.IsAllowed(known) {
+		t.Fatal("known key denied on first refresh")
+	}
+
+	// DB goes down and the cache goes stale: a transient error must not lock
+	// every admin out — the previous key set stays in effect.
+	ms.err = errors.New("db down")
+	kc.mu.Lock()
+	kc.updated = time.Now().Add(-time.Hour)
+	kc.mu.Unlock()
+
+	if !kc.IsAllowed(known) {
+		t.Error("transient refresh error locked out a previously valid key")
+	}
+}
+
+func TestKeyCache_FailsClosedAfterInvalidate(t *testing.T) {
+	authorized, known := testKey(t)
+	ms := &kcMockStore{users: []models.User{{PublicKey: authorized}}}
+	kc := newKeyCache(ms)
+
+	if !kc.IsAllowed(known) {
+		t.Fatal("known key denied on first refresh")
+	}
+
+	// Revocation happened (Invalidate) and the DB is unreachable for the
+	// re-read: the revoked key must NOT keep working off the stale cache.
+	ms.err = errors.New("db down")
+	kc.Invalidate()
+
+	if kc.IsAllowed(known) {
+		t.Error("revoked key still allowed while DB is down — fails open")
+	}
+}
+
+func TestUserInvalidatingStore_DeleteDropsKeyCache(t *testing.T) {
+	authorized, known := testKey(t)
+	ms := &kcMockStore{users: []models.User{{PublicKey: authorized}}}
+	kc := newKeyCache(ms)
+	s := &userInvalidatingStore{Store: ms, kc: kc}
+
+	if !kc.IsAllowed(known) {
+		t.Fatal("known key denied on first refresh")
+	}
+
+	// Revoke the user; DB unreachable immediately after. The cached key must
+	// be gone the moment the delete returns.
+	if err := s.DeleteUser(context.Background(), 1); err != nil {
+		t.Fatal(err)
+	}
+	ms.users = nil
+	ms.err = errors.New("db down")
+
+	if kc.IsAllowed(known) {
+		t.Error("deleted user's key still allowed from stale cache")
+	}
+}
@@ -6,12 +6,12 @@ import (
 	"errors"
 	"flag"
 	"fmt"
-	"log"
+	"log/slog"
+	"net"
 	"net/url"
 	"os"
 	"os/signal"
 	"path/filepath"
-	"strconv"
 	"strings"
 	"sync"
 	"syscall"
@@ -40,7 +40,9 @@ var (
 )

 func main() {
-	log.SetOutput(os.Stderr)
+	slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
+		Level: slog.LevelInfo,
+	})))

 	if len(os.Args) >= 2 {
 		switch os.Args[1] {
@@ -85,6 +87,39 @@ func redactDSN(dsn string) string {
 	return u.String()
 }

+// parseTrustedProxies turns UPTOP_TRUSTED_PROXIES (comma-separated CIDRs or
+// bare IPs) into networks the rate limiter trusts to set X-Forwarded-For. Bare
+// IPs are treated as single-host ranges. Invalid entries are warned about and
+// skipped, so a typo degrades to "ignore XFF" (safe) rather than aborting boot.
+func parseTrustedProxies(raw string) []*net.IPNet {
+	if strings.TrimSpace(raw) == "" {
+		return nil
+	}
+	var cidrs []*net.IPNet
+	for _, part := range strings.Split(raw, ",") {
+		part = strings.TrimSpace(part)
+		if part == "" {
+			continue
+		}
+		if !strings.Contains(part, "/") {
+			if ip := net.ParseIP(part); ip != nil {
+				bits := 32
+				if ip.To4() == nil {
+					bits = 128
+				}
+				part = fmt.Sprintf("%s/%d", part, bits)
+			}
+		}
+		_, ipnet, err := net.ParseCIDR(part)
+		if err != nil {
+			slog.Warn("ignoring invalid UPTOP_TRUSTED_PROXIES entry", "entry", part, "err", err) //nolint:gosec // structured slog, not format string
+			continue
+		}
+		cidrs = append(cidrs, ipnet)
+	}
+	return cidrs
+}
+
 func openStore(dbType, dsn string) store.Store {
 	var ss *store.SQLStore
 	var err error
@@ -94,21 +129,21 @@ func openStore(dbType, dsn string) store.Store {
 		ss, err = store.NewSQLiteStore(dsn)
 	}
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "database error: %v\n", err)
+		slog.Error("database connection failed", "err", err)
 		os.Exit(1)
 	}
 	if encKey := os.Getenv("UPTOP_ENCRYPTION_KEY"); encKey != "" {
 		enc, err := store.NewEncryptor(encKey)
 		if err != nil {
-			fmt.Fprintf(os.Stderr, "encryption key error: %v\n", err)
+			slog.Error("encryption key invalid", "err", err)
 			os.Exit(1)
 		}
 		ss.SetEncryptor(enc)
 	} else {
-		fmt.Println("WARNING: No UPTOP_ENCRYPTION_KEY set. Alert credentials stored unencrypted.")
+		slog.Warn("no UPTOP_ENCRYPTION_KEY set, alert credentials stored unencrypted")
 	}
-	if err := ss.Init(); err != nil {
-		fmt.Fprintf(os.Stderr, "database init error: %v\n", err)
+	if err := ss.Init(context.Background()); err != nil {
+		slog.Error("database init failed", "err", err)
 		os.Exit(1)
 	}
 	return ss
@@ -133,16 +168,16 @@ func runApply(args []string) {

 	f, err := config.LoadFile(*filePath)
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		slog.Error("config load failed", "err", err)
 		os.Exit(1)
 	}

-	changes, err := config.Apply(s, f, config.ApplyOpts{
+	changes, err := config.Apply(context.Background(), s, f, config.ApplyOpts{
 		DryRun: *dryRun,
 		Prune:  *prune,
 	})
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		slog.Error("config apply failed", "err", err)
 		os.Exit(1)
 	}

@@ -158,14 +193,14 @@ func runExport(args []string) {

 	s := openStore(*dbType, *dsn)

-	f, err := config.Export(s)
+	f, err := config.Export(context.Background(), s)
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		slog.Error("export failed", "err", err)
 		os.Exit(1)
 	}

 	if err := config.WriteFile(f, *outPath); err != nil {
-		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		slog.Error("export write failed", "err", err)
 		os.Exit(1)
 	}
 }
@@ -183,7 +218,7 @@ func runMigrateSecrets(args []string) {
 	}
 	enc, err := store.NewEncryptor(encKey)
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		slog.Error("encryption key invalid", "err", err)
 		os.Exit(1)
 	}

@@ -194,25 +229,25 @@ func runMigrateSecrets(args []string) {
 		ss, err = store.NewSQLiteStore(*dsn)
 	}
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "database error: %v\n", err)
+		slog.Error("database connection failed", "err", err)
 		os.Exit(1)
 	}
-	if err := ss.Init(); err != nil {
-		fmt.Fprintf(os.Stderr, "database init error: %v\n", err)
-		os.Exit(1)
-	}
-
-	alerts, err := ss.GetAllAlerts()
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "error loading alerts: %v\n", err)
+	if err := ss.Init(context.Background()); err != nil {
+		slog.Error("database init failed", "err", err)
 		os.Exit(1)
 	}

 	ss.SetEncryptor(enc)
+
+	alerts, err := ss.GetAllAlerts(context.Background())
+	if err != nil {
+		slog.Error("failed to load alerts", "err", err)
+		os.Exit(1)
+	}
 	migrated := 0
 	for _, a := range alerts {
-		if err := ss.UpdateAlert(a.ID, a.Name, a.Type, a.Settings); err != nil {
-			fmt.Fprintf(os.Stderr, "error migrating alert %q: %v\n", a.Name, err)
+		if err := ss.UpdateAlert(context.Background(), a.ID, a.Name, a.Type, a.Settings); err != nil {
+			slog.Error("alert migration failed", "alert", a.Name, "err", err)
 			os.Exit(1)
 		}
 		migrated++
@@ -221,64 +256,19 @@ func runMigrateSecrets(args []string) {
 }

 func runServe(args []string) {
-	portVal := 23234
-	dbType := "sqlite"
-	dbDSN := "uptop.db"
-	httpPort := 8080
-	enableStatus := false
-	statusTitle := "System Status"
-	clusterMode := "leader"
-	clusterPeer := ""
-	clusterKey := ""
+	cfg := parseConfig()

-	if v := os.Getenv("UPTOP_PORT"); v != "" {
-		if p, err := strconv.Atoi(v); err == nil {
-			portVal = p
-		}
-	}
-	if v := os.Getenv("UPTOP_DB_TYPE"); v != "" {
-		dbType = v
-	}
-	if v := os.Getenv("UPTOP_DB_DSN"); v != "" {
-		dbDSN = v
-	}
-	if v := os.Getenv("UPTOP_HTTP_PORT"); v != "" {
-		if p, err := strconv.Atoi(v); err == nil {
-			httpPort = p
-		}
-	}
-	if v := os.Getenv("UPTOP_STATUS_ENABLED"); v == "true" {
-		enableStatus = true
-	}
-	if v := os.Getenv("UPTOP_STATUS_TITLE"); v != "" {
-		statusTitle = v
-	}
-	if v := os.Getenv("UPTOP_CLUSTER_MODE"); v != "" {
-		clusterMode = v
-	}
-	if v := os.Getenv("UPTOP_PEER_URL"); v != "" {
-		clusterPeer = v
-	}
-	if v := os.Getenv("UPTOP_CLUSTER_SECRET"); v != "" {
-		clusterKey = v
-	}
-
-	nodeID := os.Getenv("UPTOP_NODE_ID")
-	nodeName := os.Getenv("UPTOP_NODE_NAME")
-	nodeRegion := os.Getenv("UPTOP_NODE_REGION")
-	aggStrategy := os.Getenv("UPTOP_AGG_STRATEGY")
-
-	if clusterMode == "probe" {
-		if nodeID == "" {
+	if cfg.ClusterMode == "probe" {
+		if cfg.NodeID == "" {
 			fmt.Fprintln(os.Stderr, "UPTOP_NODE_ID is required for probe mode")
 			os.Exit(1)
 		}
-		if clusterPeer == "" {
+		if cfg.PeerURL == "" {
 			fmt.Fprintln(os.Stderr, "UPTOP_PEER_URL is required for probe mode")
 			os.Exit(1)
 		}

-		fmt.Printf("Cluster: Running as PROBE (node=%s, region=%s)\n", nodeID, nodeRegion)
+		fmt.Printf("Cluster: Running as PROBE (node=%s, region=%s)\n", cfg.NodeID, cfg.NodeRegion)

 		ctx, cancel := context.WithCancel(context.Background())
 		defer cancel()
@@ -289,29 +279,28 @@ func runServe(args []string) {
 			cancel()
 		}()

-		probeAllowPrivate := os.Getenv("UPTOP_ALLOW_PRIVATE_TARGETS") == "true"
-		if probeAllowPrivate {
-			fmt.Println("WARNING: Private target blocking disabled. Monitor URLs can reach internal networks.")
+		if cfg.AllowPrivateTargets {
+			slog.Warn("private target blocking disabled, monitor URLs can reach internal networks")
 		}

 		if err := cluster.RunProbe(ctx, cluster.ProbeConfig{
-			NodeID:              nodeID,
-			NodeName:            nodeName,
-			Region:              nodeRegion,
-			LeaderURL:           clusterPeer,
-			SharedKey:           clusterKey,
+			NodeID:              cfg.NodeID,
+			NodeName:            cfg.NodeName,
+			Region:              cfg.NodeRegion,
+			LeaderURL:           cfg.PeerURL,
+			SharedKey:           cfg.ClusterSecret,
 			Interval:            30,
-			AllowPrivateTargets: probeAllowPrivate,
+			AllowPrivateTargets: cfg.AllowPrivateTargets,
 		}); err != nil {
-			fmt.Fprintf(os.Stderr, "Probe error: %v\n", err)
+			slog.Error("probe failed", "err", err)
 		}
 		return
 	}

 	fs := flag.NewFlagSet("serve", flag.ExitOnError)
-	port := fs.Int("port", portVal, "SSH Port")
-	flagDBType := fs.String("db-type", dbType, "Database type")
-	flagDSN := fs.String("dsn", dbDSN, "Database DSN")
+	port := fs.Int("port", cfg.Port, "SSH Port")
+	flagDBType := fs.String("db-type", cfg.DBType, "Database type")
+	flagDSN := fs.String("dsn", cfg.DBDSN, "Database DSN")
 	demo := fs.Bool("demo", false, "Seed demo data")
 	importKuma := fs.String("import-kuma", "", "Import Uptime Kuma backup JSON file")
 	_ = fs.Parse(args) // ExitOnError: parse errors exit before returning
@@ -320,31 +309,32 @@ func runServe(args []string) {
 	var dbErr error
 	if *flagDBType == "postgres" {
 		ss, dbErr = store.NewPostgresStore(*flagDSN)
-		fmt.Printf("Using PostgreSQL: %s\n", redactDSN(*flagDSN))
+		slog.Info("database connected", "type", "postgres", "dsn", redactDSN(*flagDSN))
 	} else {
 		ss, dbErr = store.NewSQLiteStore(*flagDSN)
-		fmt.Printf("Using SQLite: %s\n", *flagDSN)
+		slog.Info("database connected", "type", "sqlite", "dsn", *flagDSN)
 	}
 	if dbErr != nil {
-		fmt.Fprintf(os.Stderr, "database connection error: %v\n", dbErr)
+		slog.Error("database connection failed", "err", dbErr)
 		os.Exit(1)
 	}
 	defer ss.Close()

-	if encKey := os.Getenv("UPTOP_ENCRYPTION_KEY"); encKey != "" {
-		enc, err := store.NewEncryptor(encKey)
+	if cfg.EncryptionKey != "" {
+		enc, err := store.NewEncryptor(cfg.EncryptionKey)
 		if err != nil {
-			fmt.Fprintf(os.Stderr, "encryption key error: %v\n", err)
+			slog.Error("encryption key invalid", "err", err)
 			os.Exit(1)
 		}
 		ss.SetEncryptor(enc)
 	} else {
-		fmt.Println("WARNING: No UPTOP_ENCRYPTION_KEY set. Alert credentials stored unencrypted.")
+		slog.Warn("no UPTOP_ENCRYPTION_KEY set, alert credentials stored unencrypted")
 	}

-	var s store.Store = ss
-	if err := s.Init(); err != nil {
-		fmt.Fprintf(os.Stderr, "database init error: %v\n", err)
+	kc := newKeyCache(ss)
+	var s store.Store = &userInvalidatingStore{Store: ss, kc: kc}
+	if err := s.Init(context.Background()); err != nil {
+		slog.Error("database init failed", "err", err)
 		os.Exit(1)
 	}
 	if *demo {
@@ -356,29 +346,29 @@ func runServe(args []string) {
 	if *importKuma != "" {
 		kb, err := importer.LoadKumaFile(*importKuma)
 		if err != nil {
-			fmt.Fprintf(os.Stderr, "kuma import error: %v\n", err)
+			slog.Error("kuma import failed", "err", err)
 			os.Exit(1)
 		}
 		backup := importer.ConvertKuma(kb)
-		if err := s.ImportData(backup); err != nil {
-			fmt.Fprintf(os.Stderr, "import failed: %v\n", err)
+		if err := s.ImportData(context.Background(), backup); err != nil {
+			slog.Error("import failed", "err", err)
 			os.Exit(1)
 		}
 		fmt.Printf("Imported %d monitors and %d alerts from Uptime Kuma v%s\n", len(backup.Sites), len(backup.Alerts), kb.Version)
 	}

-	allowPrivate := os.Getenv("UPTOP_ALLOW_PRIVATE_TARGETS") == "true"
-	if allowPrivate {
-		fmt.Println("WARNING: Private target blocking disabled. Monitor URLs can reach internal networks.")
+	if cfg.AllowPrivateTargets {
+		slog.Warn("private target blocking disabled, monitor URLs can reach internal networks")
 	}

-	eng := monitor.NewEngineWithOpts(s, allowPrivate)
-	if os.Getenv("UPTOP_INSECURE_SKIP_VERIFY") == "true" {
+	eng := monitor.NewEngineWithOpts(s, cfg.AllowPrivateTargets)
+	if cfg.InsecureSkipVerify {
 		eng.SetInsecureSkipVerify(true)
 	}
-	if aggStrategy != "" {
-		eng.SetAggStrategy(monitor.AggregationStrategy(aggStrategy))
+	if cfg.AggStrategy != "" {
+		eng.SetAggStrategy(monitor.AggregationStrategy(cfg.AggStrategy))
 	}
+	eng.SetMaintRetention(cfg.MaintRetention)

 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -388,34 +378,22 @@ func runServe(args []string) {
 	eng.InitAlertHealth()
 	eng.Start(ctx)

-	tlsCert := os.Getenv("UPTOP_TLS_CERT")
-	tlsKey := os.Getenv("UPTOP_TLS_KEY")
+	localTUI := isatty.IsTerminal(os.Stdout.Fd()) || isatty.IsCygwinTerminal(os.Stdout.Fd())

-	httpSrv := server.Start(server.ServerConfig{
-		Port:          httpPort,
-		EnableStatus:  enableStatus,
-		Title:         statusTitle,
-		ClusterKey:    clusterKey,
-		TLSCert:       tlsCert,
-		TLSKey:        tlsKey,
-		ClusterMode:   clusterMode,
-		MetricsPublic: os.Getenv("UPTOP_METRICS_PUBLIC") == "true",
-		CORSOrigin:    os.Getenv("UPTOP_CORS_ORIGIN"),
-	}, s, eng)
+	httpSrv := server.Start(cfg.serverConfig(localTUI), s, eng)

 	cluster.Start(ctx, cluster.Config{
-		Mode:      clusterMode,
-		PeerURL:   clusterPeer,
-		SharedKey: clusterKey,
+		Mode:      cfg.ClusterMode,
+		PeerURL:   cfg.PeerURL,
+		SharedKey: cfg.ClusterSecret,
 	}, eng)

-	kc := newKeyCache(s)
 	sshSrv := startSSHServer(*port, s, eng, kc)

-	if isatty.IsTerminal(os.Stdout.Fd()) || isatty.IsCygwinTerminal(os.Stdout.Fd()) {
-		p := tea.NewProgram(tui.InitialModel(true, s, eng), tea.WithAltScreen(), tea.WithMouseCellMotion())
+	if localTUI {
+		p := tea.NewProgram(tui.InitialModel(true, s, eng, version), tea.WithAltScreen(), tea.WithMouseCellMotion())
 		if _, err := p.Run(); err != nil {
-			fmt.Fprintf(os.Stderr, "error: %v\n", err)
+			slog.Error("TUI failed", "err", err)
 		}
 	} else {
 		fmt.Println("uptop running in HEADLESS mode")
@@ -426,16 +404,18 @@ func runServe(args []string) {
 	}
 	cancel()

+	eng.Stop()
+
 	shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer shutdownCancel()
 	if httpSrv != nil {
 		if err := httpSrv.Shutdown(shutdownCtx); err != nil {
-			log.Printf("HTTP shutdown error: %v", err)
+			slog.Error("HTTP shutdown failed", "err", err)
 		}
 	}
 	if sshSrv != nil {
 		if err := sshSrv.Shutdown(shutdownCtx); err != nil {
-			log.Printf("SSH shutdown error: %v", err)
+			slog.Error("SSH shutdown failed", "err", err)
 		}
 	}
 }
@@ -449,53 +429,54 @@ func startSSHServer(port int, db store.Store, eng *monitor.Engine, kc *keyCache)
 		}),
 		wish.WithMiddleware(
 			bm.Middleware(func(s ssh.Session) (tea.Model, []tea.ProgramOption) {
-				return tui.InitialModel(false, db, eng), []tea.ProgramOption{tea.WithAltScreen(), tea.WithMouseCellMotion()}
+				return tui.InitialModel(false, db, eng, version), []tea.ProgramOption{tea.WithAltScreen(), tea.WithMouseCellMotion()}
 			}),
 		),
 	)
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "SSH server error: %v\n", err)
+		slog.Error("SSH server failed", "err", err)
 		return nil
 	}
 	go func() {
 		if err := s.ListenAndServe(); err != nil && !errors.Is(err, ssh.ErrServerClosed) {
-			log.Printf("SSH server error: %v", err)
+			slog.Error("SSH server failed", "err", err)
 		}
 	}()
 	return s
 }

 func seedDemoData(s store.Store) {
-	existing, _ := s.GetSites()
+	ctx := context.Background()
+	existing, _ := s.GetSites(ctx)
 	if len(existing) > 0 {
 		return
 	}
 	fmt.Println("Seeding demo data...")

-	if err := s.AddAlert("Discord Ops", "discord", map[string]string{"url": "https://discord.com/api/webhooks/demo/token"}); err != nil {
-		log.Printf("demo seed: add alert: %v", err)
+	if err := s.AddAlert(ctx, "Discord Ops", "discord", map[string]string{"url": "https://discord.com/api/webhooks/demo/token"}); err != nil {
+		slog.Error("demo seed failed", "step", "add alert", "err", err)
 		return
 	}
-	if err := s.AddAlert("Slack Infra", "slack", map[string]string{"url": "https://hooks.slack.com/services/DEMO/WEBHOOK"}); err != nil {
-		log.Printf("demo seed: add alert: %v", err)
+	if err := s.AddAlert(ctx, "Slack Infra", "slack", map[string]string{"url": "https://hooks.slack.com/services/DEMO/WEBHOOK"}); err != nil {
+		slog.Error("demo seed failed", "step", "add alert", "err", err)
 		return
 	}
-	if err := s.AddAlert("Email Oncall", "email", map[string]string{
+	if err := s.AddAlert(ctx, "Email Oncall", "email", map[string]string{
 		"host": "smtp.example.com", "port": "587",
 		"user": "oncall@example.com", "pass": "replace-me",
 		"from": "oncall@example.com", "to": "team@example.com",
 	}); err != nil {
-		log.Printf("demo seed: add alert: %v", err)
+		slog.Error("demo seed failed", "step", "add alert", "err", err)
 		return
 	}

-	alerts, _ := s.GetAllAlerts()
+	alerts, _ := s.GetAllAlerts(ctx)
 	alertID := 0
 	if len(alerts) > 0 {
 		alertID = alerts[0].ID
 	}

-	demoSites := []models.Site{
+	demoSites := []models.SiteConfig{
 		{Name: "Google", URL: "https://www.google.com", Type: "http", Interval: 30, AlertID: alertID, CheckSSL: true, ExpiryThreshold: 14, MaxRetries: 2},
 		{Name: "GitHub", URL: "https://github.com", Type: "http", Interval: 30, AlertID: alertID, CheckSSL: true, ExpiryThreshold: 7, MaxRetries: 3},
 		{Name: "Cloudflare DNS", URL: "https://1.1.1.1", Type: "http", Interval: 60, AlertID: alertID, ExpiryThreshold: 7, MaxRetries: 1},
@@ -508,8 +489,8 @@ func seedDemoData(s store.Store) {
 		{Name: "SSH Server", Type: "port", Interval: 60, AlertID: alertID, Hostname: "10.0.0.1", Port: 22, Timeout: 5, ExpiryThreshold: 7},
 	}
 	for _, site := range demoSites {
-		if err := s.AddSite(site); err != nil {
-			log.Printf("demo seed: add site %q: %v", site.Name, err)
+		if err := s.AddSite(ctx, site); err != nil {
+			slog.Error("demo seed failed", "step", "add site", "site", site.Name, "err", err)
 		}
 	}
 }
@@ -527,8 +508,12 @@ func newKeyCache(db store.Store) *keyCache {
 }

 func (c *keyCache) refresh() {
-	users, err := c.db.GetAllUsers()
+	users, err := c.db.GetAllUsers(context.Background())
 	if err != nil {
+		// Keep the previous key set: a transient DB error must not lock every
+		// admin out. Revocation still fails closed because Invalidate clears
+		// the set immediately.
+		slog.Error("SSH key cache refresh failed", "err", err)
 		return
 	}
 	keys := make([]ssh.PublicKey, 0, len(users))
@@ -545,8 +530,13 @@ func (c *keyCache) refresh() {
 	c.mu.Unlock()
 }

+// Invalidate clears the cached key set, not just the timestamp. If the
+// refresh that follows a user revocation fails, auth fails closed (everyone
+// re-authenticates after the next successful refresh) instead of the revoked
+// key silently continuing to work off the stale cache.
 func (c *keyCache) Invalidate() {
 	c.mu.Lock()
+	c.keys = nil
 	c.updated = time.Time{}
 	c.mu.Unlock()
 }
@@ -570,7 +560,41 @@ func (c *keyCache) IsAllowed(incomingKey ssh.PublicKey) bool {
 	return false
 }

+// userInvalidatingStore drops the SSH key cache whenever the user table
+// changes, so a revocation takes effect on the next connection attempt
+// instead of after the cache TTL — and fails closed if the DB is unreachable
+// when that next attempt re-reads the table.
+type userInvalidatingStore struct {
+	store.Store
+	kc *keyCache
+}
+
+func (s *userInvalidatingStore) AddUser(ctx context.Context, username, publicKey, role string) error {
+	err := s.Store.AddUser(ctx, username, publicKey, role)
+	s.kc.Invalidate()
+	return err
+}
+
+func (s *userInvalidatingStore) UpdateUser(ctx context.Context, id int, username, publicKey, role string) error {
+	err := s.Store.UpdateUser(ctx, id, username, publicKey, role)
+	s.kc.Invalidate()
+	return err
+}
+
+func (s *userInvalidatingStore) DeleteUser(ctx context.Context, id int) error {
+	err := s.Store.DeleteUser(ctx, id)
+	s.kc.Invalidate()
+	return err
+}
+
+func (s *userInvalidatingStore) ImportData(ctx context.Context, data models.Backup) error {
+	err := s.Store.ImportData(ctx, data)
+	s.kc.Invalidate()
+	return err
+}
+
 func seedKeysFromEnv(s store.Store) {
+	ctx := context.Background()
 	var keys []string

 	if v := os.Getenv("UPTOP_ADMIN_KEY"); v != "" {
@@ -579,7 +603,9 @@ func seedKeysFromEnv(s store.Store) {

 	if path := os.Getenv("UPTOP_KEYS"); path != "" {
 		f, err := os.Open(filepath.Clean(path))
-		if err == nil {
+		if err != nil {
+			slog.Warn("failed to open UPTOP_KEYS file", "path", path, "err", err) //nolint:gosec // structured slog, not format string
+		} else {
 			scanner := bufio.NewScanner(f)
 			for scanner.Scan() {
 				line := strings.TrimSpace(scanner.Text())
@@ -596,9 +622,9 @@ func seedKeysFromEnv(s store.Store) {
 		return
 	}

-	existing, err := s.GetAllUsers()
+	existing, err := s.GetAllUsers(ctx)
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "warning: could not check existing users: %v\n", err)
+		slog.Warn("could not check existing users", "err", err)
 		return
 	}

@@ -614,8 +640,8 @@ func seedKeysFromEnv(s store.Store) {
 		}

 		username := usernameFromKey(key, i, len(existing)+added)
-		if err := s.AddUser(username, key, "admin"); err != nil {
-			fmt.Fprintf(os.Stderr, "warning: failed to seed user %q: %v\n", username, err)
+		if err := s.AddUser(ctx, username, key, "admin"); err != nil {
+			slog.Warn("failed to seed user", "user", username, "err", err) //nolint:gosec // structured slog, not format string
 			continue
 		}
 		fmt.Printf("Seeded admin user %q from %s\n", username, seedSource(i, len(keys), os.Getenv("UPTOP_ADMIN_KEY") != ""))
@@ -18,7 +18,7 @@ services:
      
      # Cluster Config
      - UPTOP_CLUSTER_MODE=leader
-      - UPTOP_CLUSTER_SECRET=mysecret
+      - UPTOP_CLUSTER_SECRET=mysecret  # EXAMPLE ONLY — rotate before use
    depends_on:
      - leader-db
    stdin_open: true
@@ -53,7 +53,7 @@ services:
      
      # Cluster Config
      - UPTOP_CLUSTER_MODE=follower
-      - UPTOP_CLUSTER_SECRET=mysecret
+      - UPTOP_CLUSTER_SECRET=mysecret  # EXAMPLE ONLY — rotate before use
      # IMPORTANT: Uses the Service Name "leader" to connect internally
      - UPTOP_PEER_URL=http://leader:8080
    depends_on:
@@ -3,7 +3,7 @@ services:
    build: .
    environment:
      - UPTOP_CLUSTER_MODE=leader
-      - UPTOP_CLUSTER_SECRET=changeme
+      - UPTOP_CLUSTER_SECRET=changeme  # EXAMPLE ONLY — rotate before use
      - UPTOP_AGG_STRATEGY=any-down
      - UPTOP_STATUS_ENABLED=true
    ports:
@@ -18,7 +18,7 @@ services:
      - UPTOP_NODE_NAME=US East Probe
      - UPTOP_NODE_REGION=us-east
      - UPTOP_PEER_URL=http://leader:8080
-      - UPTOP_CLUSTER_SECRET=changeme
+      - UPTOP_CLUSTER_SECRET=changeme  # EXAMPLE ONLY — rotate before use
    depends_on:
      - leader

@@ -30,6 +30,6 @@ services:
      - UPTOP_NODE_NAME=EU West Probe
      - UPTOP_NODE_REGION=eu-west
      - UPTOP_PEER_URL=http://leader:8080
-      - UPTOP_CLUSTER_SECRET=changeme
+      - UPTOP_CLUSTER_SECRET=changeme  # EXAMPLE ONLY — rotate before use
    depends_on:
      - leader
@@ -5,6 +5,13 @@ services:
      dockerfile: Dockerfile
    container_name: uptop
    restart: unless-stopped
+    read_only: true
+    cap_drop:
+      - ALL
+    security_opt:
+      - no-new-privileges:true
+    tmpfs:
+      - /tmp
    ports:
      - "23234:23234"
      - "8080:8080"
@@ -16,6 +16,11 @@ A follower is a standby replica that takes over if the leader goes down.
 - When the leader recovers, the follower detects it and goes back to standby
 - Both nodes have their own database — they do not share state

+**Limitations:**
+- During a network partition where both nodes are healthy, both will run checks and fire alerts independently. There is no leader fencing — the follower has no way to confirm the leader is actually down vs. unreachable from its perspective. This window lasts until the partition heals, at which point the follower detects the leader and steps down.
+- Expect duplicate alerts and doubled check history entries during a split-brain event. Alerts are idempotent for most providers (a second "site is down" notification is noisy but not harmful).
+- Failover takeover time is ~15 seconds (3 missed polls × 5 second interval). This is not configurable.
+
 **Required env vars:**

 | Node | Variable | Value |
@@ -76,5 +81,5 @@ Set via `UPTOP_AGG_STRATEGY` on the leader.
 ## Security

 - Set `UPTOP_CLUSTER_SECRET` on all nodes. Without it, cluster API endpoints are unauthenticated.
- Secrets are sent in HTTP headers (`X-Upkeep-Secret`). Use TLS or a reverse proxy for production.
+- Secrets are sent in HTTP headers (`X-Uptop-Secret`). Use TLS or a reverse proxy for production.
 - uptop warns on startup if the cluster secret is missing or if cluster mode is active without TLS.
@@ -122,7 +122,7 @@ Groups can't nest inside other groups. A group is healthy when all its children

 ## Alert types

-All 9 providers work in the YAML. The `settings` map is different per type.
+All 10 providers work in the YAML. The `settings` map is different per type.

 ```yaml
 # Discord / Slack / Generic Webhook — just a URL
@@ -149,6 +149,9 @@ All 9 providers work in the YAML. The `settings` map is different per type.
    url: https://ntfy.sh
    topic: my-alerts
    priority: "4"
+    # for protected topics:
+    # username: user
+    # password: pass

 # Telegram
 - name: Telegram Ops
@@ -178,6 +181,14 @@ All 9 providers work in the YAML. The `settings` map is different per type.
    url: https://gotify.example.com
    token: app-token
    priority: "8"
+
+# Opsgenie
+- name: Opsgenie
+  type: opsgenie
+  settings:
+    api_key: your-api-key
+    priority: P2        # P1–P5, default P3
+    # eu: "true"        # use the EU API endpoint
 ```

 ## Commands
@@ -224,7 +235,25 @@ Monitors and alerts are matched by **name**. Names must be unique across the ent

 Apply is idempotent. Run it twice with the same file, second run changes nothing.

-If something fails mid-apply, just fix the issue and run it again. It picks up where it left off.
+Apply is **not atomic** — items are written one at a time, so an error mid-apply (bad value, lost DB connection, ctrl-C) leaves the items already written in place. That's safe to recover from: apply diffs against the database by name, so fix the issue and run it again — it converges the rest. Just don't run two applies against the same database at once.
+
+## Backups and secrets
+
+`uptop export` writes alert credentials (SMTP passwords, API tokens, webhook URLs) into the YAML in clear text — that's what makes the file restorable. Treat it like a secrets file.
+
+The HTTP export endpoint redacts those same fields **by default**:
+
+```bash
+# secrets show as ***REDACTED*** — fine for sharing or review
+curl -H "X-Uptop-Secret: your-secret" \
+  "http://localhost:8080/api/backup/export"
+
+# full backup you can actually restore from
+curl -H "X-Uptop-Secret: your-secret" \
+  "http://localhost:8080/api/backup/export?redact_secrets=false"
+```
+
+Restoring a redacted export imports the literal string `***REDACTED***` as your credentials. For real backups, pass `redact_secrets=false` or run `uptop export` on the host.

 ## Typical workflow

@@ -13,10 +13,11 @@ require (
 	github.com/lib/pq v1.11.1
 	github.com/lrstanley/bubblezone v1.0.0
 	github.com/mattn/go-isatty v0.0.20
-	github.com/mattn/go-sqlite3 v1.14.33
 	github.com/miekg/dns v1.1.72
 	github.com/prometheus-community/pro-bing v0.8.0
+	golang.org/x/crypto v0.52.0
 	gopkg.in/yaml.v3 v3.0.1
+	modernc.org/sqlite v1.52.0
 )

 require (
@@ -48,9 +49,10 @@ require (
 	github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
 	github.com/muesli/cancelreader v0.2.2 // indirect
 	github.com/muesli/termenv v0.16.0 // indirect
+	github.com/ncruces/go-strftime v1.0.0 // indirect
+	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
-	golang.org/x/crypto v0.52.0 // indirect
 	golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
 	golang.org/x/mod v0.35.0 // indirect
 	golang.org/x/net v0.55.0 // indirect
@@ -58,4 +60,7 @@ require (
 	golang.org/x/sys v0.45.0 // indirect
 	golang.org/x/text v0.37.0 // indirect
 	golang.org/x/tools v0.44.0 // indirect
+	modernc.org/libc v1.72.3 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
 )
@@ -64,8 +64,12 @@ github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi
 github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
+github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/lib/pq v1.11.1 h1:wuChtj2hfsGmmx3nf1m7xC2XpK6OtelS2shMY+bGMtI=
 github.com/lib/pq v1.11.1/go.mod h1:/p+8NSbOcwzAEI7wiMXFlgydTwcgTr3OSKMsD2BitpA=
 github.com/lrstanley/bubblezone v1.0.0 h1:bIpUaBilD42rAQwlg/4u5aTqVAt6DSRKYZuSdmkr8UA=
@@ -78,8 +82,6 @@ github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2J
 github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
 github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
 github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/mattn/go-sqlite3 v1.14.33 h1:A5blZ5ulQo2AtayQ9/limgHEkFreKj1Dv226a1K73s0=
-github.com/mattn/go-sqlite3 v1.14.33/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
 github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI=
 github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs=
 github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
@@ -90,10 +92,14 @@ github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELU
 github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
 github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
 github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
+github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
+github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus-community/pro-bing v0.8.0 h1:CEY/g1/AgERRDjxw5P32ikcOgmrSuXs7xon7ovx6mNc=
 github.com/prometheus-community/pro-bing v0.8.0/go.mod h1:Idyxz8raDO6TgkUN6ByiEGvWJNyQd40kN9ZUeho3lN0=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
 github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
@@ -125,3 +131,31 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+modernc.org/cc/v4 v4.28.2 h1:3tQ0lf2ADtoby2EtSP+J7IE2SHwEJdP8ioR59wx7XpY=
+modernc.org/cc/v4 v4.28.2/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI=
+modernc.org/ccgo/v4 v4.34.0 h1:yRLPFZieg532OT4rp4JFNIVcquwalMX26G95WQDqwCQ=
+modernc.org/ccgo/v4 v4.34.0/go.mod h1:AS5WYMyBakQ+fhsHhtP8mWB82KTGPkNNJDGfGQCe0/A=
+modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
+modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
+modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
+modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
+modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo=
+modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
+modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
+modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
+modernc.org/libc v1.72.3 h1:ZnDF4tXn4NBXFutMMQC4vtbTFSXhhKzR73fv0beZEAU=
+modernc.org/libc v1.72.3/go.mod h1:dn0dZNnnn1clLyvRxLxYExxiKRZIRENOfqQ8XEeg4Qs=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg=
+modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
+modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
+modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
+modernc.org/sqlite v1.52.0 h1:p4dhYh2tXZCiyaqHwRVJDjIGKWyXayiQpThxgDzJaxo=
+modernc.org/sqlite v1.52.0/go.mod h1:tcNzv5p84E0skkmJn038y+hWJbLQXQqEnQfeh5r2JLM=
+modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
+modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
+modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
+modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
@@ -3,10 +3,14 @@ package alert
 import (
 	"bytes"
 	"context"
+	"crypto/tls"
 	"encoding/json"
+	"errors"
 	"fmt"
+	"net"
 	"net/http"
 	"net/smtp"
+	"net/url"
 	"strconv"
 	"strings"
 	"time"
@@ -16,6 +20,22 @@ import (

 var alertClient = &http.Client{Timeout: 10 * time.Second}

+// sanitizeError strips the request URL from transport errors before they are
+// stored or displayed. *url.Error embeds the full URL, which for several
+// providers carries the credential itself (Telegram bot token in the path,
+// webhook secrets in the URL). The operation and underlying cause — the useful
+// diagnostic — are preserved.
+func sanitizeError(err error) error {
+	if err == nil {
+		return nil
+	}
+	var urlErr *url.Error
+	if errors.As(err, &urlErr) {
+		return fmt.Errorf("%s request failed: %w", urlErr.Op, urlErr.Err)
+	}
+	return err
+}
+
 type Provider interface {
 	Send(ctx context.Context, title, message string) error
 }
@@ -43,7 +63,7 @@ func (h *HTTPProvider) Send(ctx context.Context, title, message string) error {
 	}
 	resp, err := alertClient.Do(req)
 	if err != nil {
-		return err
+		return sanitizeError(err)
 	}
 	defer resp.Body.Close()
 	if resp.StatusCode >= 400 {
@@ -226,7 +246,6 @@ func (e *EmailProvider) Send(ctx context.Context, title, message string) error {
 		return ctx.Err()
 	default:
 	}
-	auth := smtp.PlainAuth("", e.User, e.Pass, e.Host)
 	to := sanitizeHeader(e.To)
 	from := sanitizeHeader(e.From)
 	subject := sanitizeHeader(title)
@@ -238,7 +257,67 @@ func (e *EmailProvider) Send(ctx context.Context, title, message string) error {
 		"Content-Type: text/plain; charset=utf-8\r\n" +
 		"\r\n" +
 		body + "\r\n")
-	return smtp.SendMail(e.Host+":"+e.Port, auth, from, []string{to}, msg)
+	return sendMailContext(ctx, e.Host, e.Port, e.User, e.Pass, from, []string{to}, msg)
+}
+
+// sendMailContext is a ctx-aware replacement for smtp.SendMail.
+// smtp.SendMail ignores context entirely — a blackholed SMTP server hangs for
+// the OS TCP timeout (minutes). This dials with the context deadline and sets
+// connection deadlines so cancellation is respected throughout.
+func sendMailContext(ctx context.Context, host, port, user, pass, from string, rcpt []string, msg []byte) error {
+	addr := host + ":" + port
+
+	dialer := net.Dialer{}
+	conn, err := dialer.DialContext(ctx, "tcp", addr)
+	if err != nil {
+		return fmt.Errorf("smtp dial: %w", err)
+	}
+
+	if deadline, ok := ctx.Deadline(); ok {
+		_ = conn.SetDeadline(deadline)
+	}
+
+	c, err := smtp.NewClient(conn, host)
+	if err != nil {
+		_ = conn.Close()
+		return fmt.Errorf("smtp client: %w", err)
+	}
+	defer c.Close()
+
+	if ok, _ := c.Extension("STARTTLS"); ok {
+		if err := c.StartTLS(&tls.Config{ServerName: host}); err != nil {
+			return fmt.Errorf("smtp starttls: %w", err)
+		}
+	}
+
+	if user != "" || pass != "" {
+		auth := smtp.PlainAuth("", user, pass, host)
+		if err := c.Auth(auth); err != nil {
+			return fmt.Errorf("smtp auth: %w", err)
+		}
+	}
+
+	if err := c.Mail(from); err != nil {
+		return fmt.Errorf("smtp mail: %w", err)
+	}
+	for _, r := range rcpt {
+		if err := c.Rcpt(r); err != nil {
+			return fmt.Errorf("smtp rcpt: %w", err)
+		}
+	}
+
+	w, err := c.Data()
+	if err != nil {
+		return fmt.Errorf("smtp data: %w", err)
+	}
+	if _, err := w.Write(msg); err != nil {
+		return fmt.Errorf("smtp write: %w", err)
+	}
+	if err := w.Close(); err != nil {
+		return fmt.Errorf("smtp data close: %w", err)
+	}
+
+	return c.Quit()
 }

 type NtfyProvider struct {
@@ -262,7 +341,7 @@ func (n *NtfyProvider) Send(ctx context.Context, title, message string) error {
 	}
 	resp, err := alertClient.Do(req)
 	if err != nil {
-		return err
+		return sanitizeError(err)
 	}
 	defer resp.Body.Close()
 	if resp.StatusCode >= 400 {
@@ -1,11 +1,18 @@
 package alert

 import (
+	"bufio"
 	"context"
 	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
 	"net/http"
 	"net/http/httptest"
+	"net/url"
+	"strings"
 	"testing"
+	"time"

 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 )
@@ -298,3 +305,145 @@ func TestSanitizeHeader(t *testing.T) {
 		}
 	}
 }
+
+// sanitizeError must strip the credential-bearing URL from a *url.Error while
+// keeping the operation and underlying cause.
+func TestSanitizeError(t *testing.T) {
+	urlErr := &url.Error{
+		Op:  "Post",
+		URL: "https://api.telegram.org/bot123456:SECRET_TOKEN/sendMessage",
+		Err: errors.New("dial tcp: connection refused"),
+	}
+	got := sanitizeError(urlErr).Error()
+
+	for _, leak := range []string{"SECRET_TOKEN", "api.telegram.org", "sendMessage", "bot123456"} {
+		if strings.Contains(got, leak) {
+			t.Errorf("sanitized error leaked %q: %s", leak, got)
+		}
+	}
+	if !strings.Contains(got, "connection refused") {
+		t.Errorf("expected underlying cause preserved, got: %s", got)
+	}
+
+	// Non-url errors pass through unchanged.
+	plain := errors.New("plain failure")
+	if sanitizeError(plain).Error() != "plain failure" {
+		t.Errorf("non-url error altered: %s", sanitizeError(plain))
+	}
+	if sanitizeError(nil) != nil {
+		t.Error("nil should stay nil")
+	}
+}
+
+func TestEmailProvider_ContextTimeout(t *testing.T) {
+	// Listener that accepts but never speaks — simulates a blackholed SMTP server.
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ln.Close()
+
+	go func() {
+		for {
+			conn, err := ln.Accept()
+			if err != nil {
+				return
+			}
+			// Hold connection open, never send banner.
+			go func(c net.Conn) {
+				time.Sleep(30 * time.Second)
+				c.Close()
+			}(conn)
+		}
+	}()
+
+	_, portStr, _ := net.SplitHostPort(ln.Addr().String())
+	provider := &EmailProvider{
+		Host: "127.0.0.1", Port: portStr,
+		From: "test@test.com", To: "dest@test.com",
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
+	defer cancel()
+
+	start := time.Now()
+	err = provider.Send(ctx, "test", "body")
+	elapsed := time.Since(start)
+
+	if err == nil {
+		t.Fatal("expected error from stalled SMTP")
+	}
+	if elapsed > 2*time.Second {
+		t.Errorf("Send took %v — context deadline not respected", elapsed)
+	}
+}
+
+func TestSendMailContext_HappyPath(t *testing.T) {
+	// Minimal fake SMTP server that accepts one message.
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ln.Close()
+
+	received := make(chan string, 1)
+	go func() {
+		conn, err := ln.Accept()
+		if err != nil {
+			return
+		}
+		defer conn.Close()
+
+		fmt.Fprintf(conn, "220 localhost ESMTP\r\n")
+		scanner := bufio.NewScanner(conn)
+		var dataMode bool
+		var body strings.Builder
+		for scanner.Scan() {
+			line := scanner.Text()
+			if dataMode {
+				if line == "." {
+					dataMode = false
+					fmt.Fprintf(conn, "250 OK\r\n")
+					continue
+				}
+				body.WriteString(line + "\n")
+				continue
+			}
+			switch {
+			case strings.HasPrefix(line, "EHLO"):
+				fmt.Fprintf(conn, "250-localhost\r\n250 OK\r\n")
+			case strings.HasPrefix(line, "MAIL FROM"):
+				fmt.Fprintf(conn, "250 OK\r\n")
+			case strings.HasPrefix(line, "RCPT TO"):
+				fmt.Fprintf(conn, "250 OK\r\n")
+			case line == "DATA":
+				fmt.Fprintf(conn, "354 Go ahead\r\n")
+				dataMode = true
+			case line == "QUIT":
+				fmt.Fprintf(conn, "221 Bye\r\n")
+				received <- body.String()
+				return
+			default:
+				fmt.Fprintf(conn, "250 OK\r\n")
+			}
+		}
+	}()
+
+	_, portStr, _ := net.SplitHostPort(ln.Addr().String())
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	err = sendMailContext(ctx, "127.0.0.1", portStr, "", "", "from@test.com", []string{"to@test.com"}, []byte("Subject: test\r\n\r\nhello"))
+	if err != nil {
+		t.Fatalf("sendMailContext: %v", err)
+	}
+
+	select {
+	case body := <-received:
+		if !strings.Contains(body, "hello") {
+			t.Errorf("expected body to contain 'hello', got: %s", body)
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatal("timed out waiting for fake SMTP to receive message")
+	}
+}
@@ -52,7 +52,7 @@ func runFollowerLoop(ctx context.Context, cfg Config, eng *monitor.Engine) {

 		req, _ := http.NewRequest("GET", cfg.PeerURL+"/api/health", nil)
 		if cfg.SharedKey != "" {
-			req.Header.Set("X-Upkeep-Secret", cfg.SharedKey)
+			req.Header.Set("X-Uptop-Secret", cfg.SharedKey)
 		}

 		resp, err := client.Do(req)
@@ -12,72 +12,13 @@ import (

 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store/storetest"
 )

-// --- Mock Store (minimal, for monitor.NewEngine) ---
-
 type mockStore struct {
-	sites []models.Site
+	storetest.BaseMock
 }

-func (m *mockStore) Init() error                                              { return nil }
-func (m *mockStore) GetSites() ([]models.Site, error)                         { return m.sites, nil }
-func (m *mockStore) AddSite(models.Site) error                                { return nil }
-func (m *mockStore) UpdateSite(models.Site) error                             { return nil }
-func (m *mockStore) UpdateSitePaused(int, bool) error                         { return nil }
-func (m *mockStore) DeleteSite(int) error                                     { return nil }
-func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error)              { return nil, nil }
-func (m *mockStore) GetAlert(int) (models.AlertConfig, error)                 { return models.AlertConfig{}, nil }
-func (m *mockStore) AddAlert(string, string, map[string]string) error         { return nil }
-func (m *mockStore) UpdateAlert(int, string, string, map[string]string) error { return nil }
-func (m *mockStore) DeleteAlert(int) error                                    { return nil }
-func (m *mockStore) GetAllUsers() ([]models.User, error)                      { return nil, nil }
-func (m *mockStore) AddUser(string, string, string) error                     { return nil }
-func (m *mockStore) UpdateUser(int, string, string, string) error             { return nil }
-func (m *mockStore) DeleteUser(int) error                                     { return nil }
-func (m *mockStore) SaveCheck(int, int64, bool) error                         { return nil }
-func (m *mockStore) SaveCheckFromNode(int, string, int64, bool) error         { return nil }
-func (m *mockStore) LoadAllHistory(int) (map[int][]models.CheckRecord, error) { return nil, nil }
-func (m *mockStore) ExportData() (models.Backup, error)                       { return models.Backup{}, nil }
-func (m *mockStore) ImportData(models.Backup) error                           { return nil }
-func (m *mockStore) GetSiteByName(string) (models.Site, error)                { return models.Site{}, nil }
-func (m *mockStore) GetAlertByName(string) (models.AlertConfig, error) {
-	return models.AlertConfig{}, nil
-}
-func (m *mockStore) AddSiteReturningID(models.Site) (int, error) { return 0, nil }
-func (m *mockStore) AddAlertReturningID(string, string, map[string]string) (int, error) {
-	return 0, nil
-}
-func (m *mockStore) RegisterNode(models.ProbeNode) error      { return nil }
-func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return models.ProbeNode{}, nil }
-func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
-func (m *mockStore) UpdateNodeLastSeen(string) error          { return nil }
-func (m *mockStore) DeleteNode(string) error                  { return nil }
-func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
-	return nil, nil
-}
-func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
-func (m *mockStore) SaveLog(string) error                           { return nil }
-func (m *mockStore) LoadLogs(int) ([]string, error)                 { return nil, nil }
-func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
-	return nil, nil
-}
-func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
-	return nil, nil
-}
-func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error    { return nil }
-func (m *mockStore) EndMaintenanceWindow(int) error                         { return nil }
-func (m *mockStore) DeleteMaintenanceWindow(int) error                      { return nil }
-func (m *mockStore) IsMonitorInMaintenance(int) (bool, error)               { return false, nil }
-func (m *mockStore) GetPreference(string) (string, error)                   { return "", nil }
-func (m *mockStore) SetPreference(string, string) error                     { return nil }
-func (m *mockStore) SaveStateChange(int, string, string, string) error      { return nil }
-func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
-func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
-	return nil, nil
-}
-func (m *mockStore) Close() error { return nil }
-
 // --- Cluster Start Tests ---

 func TestStart_LeaderMode(t *testing.T) {
@@ -172,7 +113,7 @@ func TestFollowerLoop_SendsSecret(t *testing.T) {
 	var receivedSecret string
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		mu.Lock()
-		receivedSecret = r.Header.Get("X-Upkeep-Secret")
+		receivedSecret = r.Header.Get("X-Uptop-Secret")
 		mu.Unlock()
 		w.WriteHeader(200)
 		w.Write([]byte("OK"))
@@ -262,7 +203,7 @@ func TestProbeRegister_Failure(t *testing.T) {
 func TestProbeFetchAssignments_Success(t *testing.T) {
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		json.NewEncoder(w).Encode(map[string][]models.Site{
-			"sites": {{ID: 1, Name: "s1", Type: "http", URL: "http://example.com"}},
+			"sites": {{SiteConfig: models.SiteConfig{ID: 1, Name: "s1", Type: "http", URL: "http://example.com"}}},
 		})
 	}))
 	defer srv.Close()
@@ -299,8 +240,8 @@ func TestProbeExecuteChecks(t *testing.T) {
 	defer srv.Close()

 	sites := []models.Site{
-		{ID: 1, Type: "http", URL: srv.URL},
-		{ID: 2, Type: "http", URL: srv.URL},
+		{SiteConfig: models.SiteConfig{ID: 1, Type: "http", URL: srv.URL}},
+		{SiteConfig: models.SiteConfig{ID: 2, Type: "http", URL: srv.URL}},
 	}

 	strict := &http.Client{}
@@ -336,7 +277,7 @@ func TestProbeExecuteChecks_Concurrency(t *testing.T) {

 	var sites []models.Site
 	for i := 0; i < 20; i++ {
-		sites = append(sites, models.Site{ID: i + 1, Type: "http", URL: srv.URL})
+		sites = append(sites, models.Site{SiteConfig: models.SiteConfig{ID: i + 1, Type: "http", URL: srv.URL}})
 	}

 	results := probeExecuteChecks(context.Background(), sites, &http.Client{}, &http.Client{}, true)
@@ -6,7 +6,7 @@ import (
 	"crypto/tls"
 	"encoding/json"
 	"fmt"
-	"log"
+	"log/slog"
 	"net/http"
 	"net/url"
 	"sync"
@@ -47,7 +47,7 @@ func RunProbe(ctx context.Context, cfg ProbeConfig) error {
 	}

 	if err := probeRegister(ctx, apiClient, cfg); err != nil {
-		log.Printf("Probe: initial registration failed: %v (will retry)", err)
+		slog.Error("probe initial registration failed", "err", err)
 	}

 	for {
@@ -59,7 +59,7 @@ func RunProbe(ctx context.Context, cfg ProbeConfig) error {

 		sites, err := probeFetchAssignments(ctx, apiClient, cfg)
 		if err != nil {
-			log.Printf("Probe: failed to fetch assignments: %v", err)
+			slog.Error("probe failed to fetch assignments", "err", err)
 			sleepCtx(ctx, 10*time.Second)
 			continue
 		}
@@ -73,7 +73,7 @@ func RunProbe(ctx context.Context, cfg ProbeConfig) error {

 		if len(results) > 0 {
 			if err := probeReportResults(ctx, apiClient, cfg, results); err != nil {
-				log.Printf("Probe: failed to report results: %v", err)
+				slog.Error("probe failed to report results", "err", err)
 			}
 		}

@@ -90,7 +90,7 @@ func probeRegister(ctx context.Context, client *http.Client, cfg ProbeConfig) er
 		return err
 	}
 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("X-Upkeep-Secret", cfg.SharedKey)
+	req.Header.Set("X-Uptop-Secret", cfg.SharedKey)
 	resp, err := client.Do(req)
 	if err != nil {
 		return err
@@ -108,7 +108,7 @@ func probeFetchAssignments(ctx context.Context, client *http.Client, cfg ProbeCo
 	if err != nil {
 		return nil, err
 	}
-	req.Header.Set("X-Upkeep-Secret", cfg.SharedKey)
+	req.Header.Set("X-Uptop-Secret", cfg.SharedKey)
 	resp, err := client.Do(req)
 	if err != nil {
 		return nil, err
@@ -152,12 +152,12 @@ loop:
 			defer wg.Done()
 			defer func() { <-sem }()

-			cr := monitor.RunCheck(s, strict, insecure, false, allowPrivate)
+			cr := monitor.RunCheck(ctx, s.SiteConfig, strict, insecure, false, allowPrivate)
 			mu.Lock()
 			results = append(results, probeResultItem{
 				SiteID:      s.ID,
 				LatencyNs:   cr.LatencyNs,
-				IsUp:        cr.Status == "UP",
+				IsUp:        cr.Status == string(models.StatusUp),
 				ErrorReason: cr.ErrorReason,
 			})
 			mu.Unlock()
@@ -180,7 +180,7 @@ func probeReportResults(ctx context.Context, client *http.Client, cfg ProbeConfi
 		return err
 	}
 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("X-Upkeep-Secret", cfg.SharedKey)
+	req.Header.Set("X-Uptop-Secret", cfg.SharedKey)
 	resp, err := client.Do(req)
 	if err != nil {
 		return err
@@ -189,7 +189,7 @@ func probeReportResults(ctx context.Context, client *http.Client, cfg ProbeConfi
 	if resp.StatusCode != 200 {
 		return fmt.Errorf("results returned %d", resp.StatusCode)
 	}
-	fmt.Printf("Probe: reported %d check results\n", len(results))
+	slog.Info("probe reported check results", "count", len(results))
 	return nil
 }

@@ -1,11 +1,13 @@
 package config

 import (
+	"context"
 	"fmt"
-	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
-	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
 	"reflect"
 	"strings"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
 )

 type ApplyOpts struct {
@@ -20,17 +22,17 @@ type Change struct {
 	Details string
 }

-func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
+func Apply(ctx context.Context, s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 	if err := Validate(f); err != nil {
 		return nil, err
 	}

-	existingAlerts, err := s.GetAllAlerts()
+	existingAlerts, err := s.GetAllAlerts(ctx)
 	if err != nil {
 		return nil, fmt.Errorf("load alerts: %w", err)
 	}

-	existingSites, err := s.GetSites()
+	existingSites, err := s.GetSites(ctx)
 	if err != nil {
 		return nil, fmt.Errorf("load sites: %w", err)
 	}
@@ -40,7 +42,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 		existingAlertsByName[a.Name] = a
 	}

-	existingSitesByName := make(map[string]models.Site, len(existingSites))
+	existingSitesByName := make(map[string]models.SiteConfig, len(existingSites))
 	for _, s := range existingSites {
 		existingSitesByName[s.Name] = s
 	}
@@ -59,7 +61,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 		if !exists {
 			changes = append(changes, Change{Action: "create", Kind: "alert", Name: a.Name, Details: a.Type})
 			if !opts.DryRun {
-				id, err := s.AddAlertReturningID(a.Name, a.Type, a.Settings)
+				id, err := s.AddAlertReturningID(ctx, a.Name, a.Type, a.Settings)
 				if err != nil {
 					return changes, fmt.Errorf("create alert %q: %w", a.Name, err)
 				}
@@ -70,7 +72,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 			if diff := diffAlert(existing, a); diff != "" {
 				changes = append(changes, Change{Action: "update", Kind: "alert", Name: a.Name, Details: diff})
 				if !opts.DryRun {
-					if err := s.UpdateAlert(existing.ID, a.Name, a.Type, a.Settings); err != nil {
+					if err := s.UpdateAlert(ctx, existing.ID, a.Name, a.Type, a.Settings); err != nil {
 						return changes, fmt.Errorf("update alert %q: %w", a.Name, err)
 					}
 				}
@@ -102,7 +104,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 		if !exists {
 			changes = append(changes, Change{Action: "create", Kind: "monitor", Name: g.Name, Details: "group"})
 			if !opts.DryRun {
-				id, err := s.AddSiteReturningID(site)
+				id, err := s.AddSiteReturningID(ctx, site)
 				if err != nil {
 					return changes, fmt.Errorf("create group %q: %w", g.Name, err)
 				}
@@ -114,7 +116,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 			if diff := diffSite(normalizeSite(existing), site); diff != "" {
 				changes = append(changes, Change{Action: "update", Kind: "monitor", Name: g.Name, Details: diff})
 				if !opts.DryRun {
-					if err := s.UpdateSite(site); err != nil {
+					if err := s.UpdateSite(ctx, site); err != nil {
 						return changes, fmt.Errorf("update group %q: %w", g.Name, err)
 					}
 				}
@@ -125,7 +127,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 	for _, g := range groups {
 		parentID := groupMap[g.Name]
 		for _, child := range g.Monitors {
-			c, err := applyMonitor(s, child, alertMap, existingSitesByName, parentID, opts.DryRun)
+			c, err := applyMonitor(ctx, s, child, alertMap, existingSitesByName, parentID, opts.DryRun)
 			if err != nil {
 				return changes, err
 			}
@@ -134,7 +136,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 	}

 	for _, m := range topLevel {
-		c, err := applyMonitor(s, m, alertMap, existingSitesByName, 0, opts.DryRun)
+		c, err := applyMonitor(ctx, s, m, alertMap, existingSitesByName, 0, opts.DryRun)
 		if err != nil {
 			return changes, err
 		}
@@ -155,7 +157,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 				childDeletes = append(childDeletes, c)
 			}
 			if !opts.DryRun {
-				if err := s.DeleteSite(es.ID); err != nil {
+				if err := s.DeleteSite(ctx, es.ID); err != nil {
 					return changes, fmt.Errorf("delete monitor %q: %w", es.Name, err)
 				}
 			}
@@ -169,7 +171,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 			}
 			changes = append(changes, Change{Action: "delete", Kind: "alert", Name: ea.Name, Details: ea.Type})
 			if !opts.DryRun {
-				if err := s.DeleteAlert(ea.ID); err != nil {
+				if err := s.DeleteAlert(ctx, ea.ID); err != nil {
 					return changes, fmt.Errorf("delete alert %q: %w", ea.Name, err)
 				}
 			}
@@ -179,7 +181,7 @@ func Apply(s store.Store, f *File, opts ApplyOpts) ([]Change, error) {
 	return changes, nil
 }

-func applyMonitor(s store.Store, m Monitor, alertMap map[string]int, existing map[string]models.Site, parentID int, dryRun bool) ([]Change, error) {
+func applyMonitor(ctx context.Context, s store.Store, m Monitor, alertMap map[string]int, existing map[string]models.SiteConfig, parentID int, dryRun bool) ([]Change, error) {
 	alertID, err := resolveAlertID(alertMap, m.Alert)
 	if err != nil {
 		return nil, fmt.Errorf("monitor %q: %w", m.Name, err)
@@ -191,7 +193,7 @@ func applyMonitor(s store.Store, m Monitor, alertMap map[string]int, existing ma
 	if !exists {
 		changes = append(changes, Change{Action: "create", Kind: "monitor", Name: m.Name, Details: m.Type})
 		if !dryRun {
-			if _, err := s.AddSiteReturningID(site); err != nil {
+			if _, err := s.AddSiteReturningID(ctx, site); err != nil {
 				return changes, fmt.Errorf("create monitor %q: %w", m.Name, err)
 			}
 		}
@@ -200,7 +202,7 @@ func applyMonitor(s store.Store, m Monitor, alertMap map[string]int, existing ma
 		if diff := diffSite(normalizeSite(ex), site); diff != "" {
 			changes = append(changes, Change{Action: "update", Kind: "monitor", Name: m.Name, Details: diff})
 			if !dryRun {
-				if err := s.UpdateSite(site); err != nil {
+				if err := s.UpdateSite(ctx, site); err != nil {
 					return changes, fmt.Errorf("update monitor %q: %w", m.Name, err)
 				}
 			}
@@ -220,8 +222,8 @@ func resolveAlertID(alertMap map[string]int, name string) (int, error) {
 	return id, nil
 }

-func monitorToSite(m Monitor, alertID, parentID int) models.Site {
-	s := models.Site{
+func monitorToSite(m Monitor, alertID, parentID int) models.SiteConfig {
+	s := models.SiteConfig{
 		Name:     m.Name,
 		Type:     m.Type,
 		URL:      m.URL,
@@ -267,7 +269,7 @@ func collectMonitorNames(monitors []Monitor, names map[string]bool) {
 	}
 }

-func normalizeSite(s models.Site) models.Site {
+func normalizeSite(s models.SiteConfig) models.SiteConfig {
 	if s.Method == "" {
 		s.Method = "GET"
 	}
@@ -291,7 +293,7 @@ func diffAlert(existing models.AlertConfig, desired Alert) string {
 	return strings.Join(diffs, ", ")
 }

-func diffSite(existing, desired models.Site) string {
+func diffSite(existing, desired models.SiteConfig) string {
 	var diffs []string
 	if existing.URL != desired.URL {
 		diffs = append(diffs, fmt.Sprintf("url: %s -> %s", existing.URL, desired.URL))
@@ -1,10 +1,12 @@
 package config

 import (
-	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
-	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
+	"context"
 	"strings"
 	"testing"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
 )

 func newTestStore(t *testing.T) store.Store {
@@ -13,7 +15,7 @@ func newTestStore(t *testing.T) store.Store {
 	if err != nil {
 		t.Fatalf("NewSQLiteStore: %v", err)
 	}
-	if err := s.Init(); err != nil {
+	if err := s.Init(context.Background()); err != nil {
 		t.Fatalf("Init: %v", err)
 	}
 	return s
@@ -31,7 +33,7 @@ func TestApplyCreateFromScratch(t *testing.T) {
 		},
 	}

-	changes, err := Apply(s, f, ApplyOpts{})
+	changes, err := Apply(context.Background(), s, f, ApplyOpts{})
 	if err != nil {
 		t.Fatalf("Apply: %v", err)
 	}
@@ -46,12 +48,12 @@ func TestApplyCreateFromScratch(t *testing.T) {
 		t.Fatalf("expected 3 creates, got %d", creates)
 	}

-	sites, _ := s.GetSites()
+	sites, _ := s.GetSites(context.Background())
 	if len(sites) != 2 {
 		t.Fatalf("expected 2 sites, got %d", len(sites))
 	}

-	alerts, _ := s.GetAllAlerts()
+	alerts, _ := s.GetAllAlerts(context.Background())
 	if len(alerts) != 1 {
 		t.Fatalf("expected 1 alert, got %d", len(alerts))
 	}
@@ -68,11 +70,11 @@ func TestApplyIdempotent(t *testing.T) {
 		},
 	}

-	if _, err := Apply(s, f, ApplyOpts{}); err != nil {
+	if _, err := Apply(context.Background(), s, f, ApplyOpts{}); err != nil {
 		t.Fatalf("first Apply: %v", err)
 	}

-	changes, err := Apply(s, f, ApplyOpts{})
+	changes, err := Apply(context.Background(), s, f, ApplyOpts{})
 	if err != nil {
 		t.Fatalf("second Apply: %v", err)
 	}
@@ -90,12 +92,12 @@ func TestApplyUpdate(t *testing.T) {
 		},
 	}

-	if _, err := Apply(s, f, ApplyOpts{}); err != nil {
+	if _, err := Apply(context.Background(), s, f, ApplyOpts{}); err != nil {
 		t.Fatalf("first Apply: %v", err)
 	}

 	f.Monitors[0].Interval = 60
-	changes, err := Apply(s, f, ApplyOpts{})
+	changes, err := Apply(context.Background(), s, f, ApplyOpts{})
 	if err != nil {
 		t.Fatalf("second Apply: %v", err)
 	}
@@ -104,7 +106,7 @@ func TestApplyUpdate(t *testing.T) {
 		t.Fatalf("expected 1 update, got %+v", changes)
 	}

-	sites, _ := s.GetSites()
+	sites, _ := s.GetSites(context.Background())
 	if sites[0].Interval != 60 {
 		t.Fatalf("expected interval 60, got %d", sites[0].Interval)
 	}
@@ -112,8 +114,8 @@ func TestApplyUpdate(t *testing.T) {

 func TestApplyPrune(t *testing.T) {
 	s := newTestStore(t)
-	s.AddSite(models.Site{Name: "Keep", URL: "https://keep.com", Type: "http", Interval: 30, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
-	s.AddSite(models.Site{Name: "Remove", URL: "https://remove.com", Type: "http", Interval: 30, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
+	s.AddSite(context.Background(), models.SiteConfig{Name: "Keep", URL: "https://keep.com", Type: "http", Interval: 30, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
+	s.AddSite(context.Background(), models.SiteConfig{Name: "Remove", URL: "https://remove.com", Type: "http", Interval: 30, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})

 	f := &File{
 		Monitors: []Monitor{
@@ -121,7 +123,7 @@ func TestApplyPrune(t *testing.T) {
 		},
 	}

-	changes, err := Apply(s, f, ApplyOpts{Prune: true})
+	changes, err := Apply(context.Background(), s, f, ApplyOpts{Prune: true})
 	if err != nil {
 		t.Fatalf("Apply: %v", err)
 	}
@@ -136,7 +138,7 @@ func TestApplyPrune(t *testing.T) {
 		t.Fatalf("expected 1 delete, got %d", deleteCount)
 	}

-	sites, _ := s.GetSites()
+	sites, _ := s.GetSites(context.Background())
 	if len(sites) != 1 || sites[0].Name != "Keep" {
 		t.Fatalf("expected only 'Keep', got %+v", sites)
 	}
@@ -150,7 +152,7 @@ func TestApplyDryRun(t *testing.T) {
 		},
 	}

-	changes, err := Apply(s, f, ApplyOpts{DryRun: true})
+	changes, err := Apply(context.Background(), s, f, ApplyOpts{DryRun: true})
 	if err != nil {
 		t.Fatalf("Apply: %v", err)
 	}
@@ -159,7 +161,7 @@ func TestApplyDryRun(t *testing.T) {
 		t.Fatalf("expected 1 create in dry-run, got %+v", changes)
 	}

-	sites, _ := s.GetSites()
+	sites, _ := s.GetSites(context.Background())
 	if len(sites) != 0 {
 		t.Fatalf("expected 0 sites after dry-run, got %d", len(sites))
 	}
@@ -179,7 +181,7 @@ func TestApplyGroupHierarchy(t *testing.T) {
 		},
 	}

-	changes, err := Apply(s, f, ApplyOpts{})
+	changes, err := Apply(context.Background(), s, f, ApplyOpts{})
 	if err != nil {
 		t.Fatalf("Apply: %v", err)
 	}
@@ -188,8 +190,8 @@ func TestApplyGroupHierarchy(t *testing.T) {
 		t.Fatalf("expected 3 creates, got %d", len(changes))
 	}

-	sites, _ := s.GetSites()
-	var group models.Site
+	sites, _ := s.GetSites(context.Background())
+	var group models.SiteConfig
 	for _, s := range sites {
 		if s.Type == "group" {
 			group = s
@@ -223,12 +225,12 @@ func TestApplyAlertReference(t *testing.T) {
 		},
 	}

-	if _, err := Apply(s, f, ApplyOpts{}); err != nil {
+	if _, err := Apply(context.Background(), s, f, ApplyOpts{}); err != nil {
 		t.Fatalf("Apply: %v", err)
 	}

-	sites, _ := s.GetSites()
-	alerts, _ := s.GetAllAlerts()
+	sites, _ := s.GetSites(context.Background())
+	alerts, _ := s.GetAllAlerts(context.Background())

 	if sites[0].AlertID != alerts[0].ID {
 		t.Fatalf("expected alert_id %d, got %d", alerts[0].ID, sites[0].AlertID)
@@ -243,7 +245,7 @@ func TestApplyInvalidAlertRef(t *testing.T) {
 		},
 	}

-	_, err := Apply(s, f, ApplyOpts{})
+	_, err := Apply(context.Background(), s, f, ApplyOpts{})
 	if err == nil || !strings.Contains(err.Error(), "not found") {
 		t.Fatalf("expected alert not found error, got %v", err)
 	}
@@ -258,7 +260,7 @@ func TestApplyDuplicateNames(t *testing.T) {
 		},
 	}

-	_, err := Apply(s, f, ApplyOpts{})
+	_, err := Apply(context.Background(), s, f, ApplyOpts{})
 	if err == nil || !strings.Contains(err.Error(), "duplicate") {
 		t.Fatalf("expected duplicate error, got %v", err)
 	}
@@ -266,7 +268,7 @@ func TestApplyDuplicateNames(t *testing.T) {

 func TestApplyExistingAlertReference(t *testing.T) {
 	s := newTestStore(t)
-	s.AddAlert("Existing", "webhook", map[string]string{"url": "https://example.com"})
+	s.AddAlert(context.Background(), "Existing", "webhook", map[string]string{"url": "https://example.com"})

 	f := &File{
 		Monitors: []Monitor{
@@ -274,7 +276,7 @@ func TestApplyExistingAlertReference(t *testing.T) {
 		},
 	}

-	changes, err := Apply(s, f, ApplyOpts{})
+	changes, err := Apply(context.Background(), s, f, ApplyOpts{})
 	if err != nil {
 		t.Fatalf("Apply: %v", err)
 	}
@@ -283,7 +285,7 @@ func TestApplyExistingAlertReference(t *testing.T) {
 		t.Fatalf("expected 1 create, got %+v", changes)
 	}

-	sites, _ := s.GetSites()
+	sites, _ := s.GetSites(context.Background())
 	if sites[0].AlertID == 0 {
 		t.Fatal("expected non-zero alert_id for existing alert reference")
 	}
@@ -1,6 +1,7 @@
 package config

 import (
+	"context"
 	"fmt"
 	"os"
 	"sort"
@@ -11,13 +12,13 @@ import (
 	"gopkg.in/yaml.v3"
 )

-func Export(s store.Store) (*File, error) {
-	dbAlerts, err := s.GetAllAlerts()
+func Export(ctx context.Context, s store.Store) (*File, error) {
+	dbAlerts, err := s.GetAllAlerts(ctx)
 	if err != nil {
 		return nil, fmt.Errorf("load alerts: %w", err)
 	}

-	dbSites, err := s.GetSites()
+	dbSites, err := s.GetSites(ctx)
 	if err != nil {
 		return nil, fmt.Errorf("load sites: %w", err)
 	}
@@ -33,9 +34,9 @@ func Export(s store.Store) (*File, error) {
 		})
 	}

-	groups := make(map[int]models.Site)
-	children := make(map[int][]models.Site)
-	var topLevel []models.Site
+	groups := make(map[int]models.SiteConfig)
+	children := make(map[int][]models.SiteConfig)
+	var topLevel []models.SiteConfig

 	for _, s := range dbSites {
 		switch {
@@ -75,7 +76,7 @@ func Export(s store.Store) (*File, error) {
 	return &File{Alerts: yamlAlerts, Monitors: yamlMonitors}, nil
 }

-func siteToMonitor(s models.Site, alertIDToName map[int]string) Monitor {
+func siteToMonitor(s models.SiteConfig, alertIDToName map[int]string) Monitor {
 	m := Monitor{
 		Name:     s.Name,
 		Type:     s.Type,
@@ -1,13 +1,15 @@
 package config

 import (
-	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"context"
 	"testing"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 )

 func TestExportEmpty(t *testing.T) {
 	s := newTestStore(t)
-	f, err := Export(s)
+	f, err := Export(context.Background(), s)
 	if err != nil {
 		t.Fatalf("Export: %v", err)
 	}
@@ -18,11 +20,11 @@ func TestExportEmpty(t *testing.T) {

 func TestExportAlertNames(t *testing.T) {
 	s := newTestStore(t)
-	s.AddAlert("Discord", "discord", map[string]string{"url": "https://example.com"})
-	alerts, _ := s.GetAllAlerts()
-	s.AddSite(models.Site{Name: "Web", URL: "https://example.com", Type: "http", Interval: 30, AlertID: alerts[0].ID, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
+	s.AddAlert(context.Background(), "Discord", "discord", map[string]string{"url": "https://example.com"})
+	alerts, _ := s.GetAllAlerts(context.Background())
+	s.AddSite(context.Background(), models.SiteConfig{Name: "Web", URL: "https://example.com", Type: "http", Interval: 30, AlertID: alerts[0].ID, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})

-	f, err := Export(s)
+	f, err := Export(context.Background(), s)
 	if err != nil {
 		t.Fatalf("Export: %v", err)
 	}
@@ -37,11 +39,11 @@ func TestExportAlertNames(t *testing.T) {

 func TestExportGroupHierarchy(t *testing.T) {
 	s := newTestStore(t)
-	groupID, _ := s.AddSiteReturningID(models.Site{Name: "Prod", Type: "group", ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
-	s.AddSite(models.Site{Name: "Prod Web", URL: "https://prod.example.com", Type: "http", Interval: 15, ParentID: groupID, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
-	s.AddSite(models.Site{Name: "Top Level", URL: "https://example.com", Type: "http", Interval: 30, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
+	groupID, _ := s.AddSiteReturningID(context.Background(), models.SiteConfig{Name: "Prod", Type: "group", ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
+	s.AddSite(context.Background(), models.SiteConfig{Name: "Prod Web", URL: "https://prod.example.com", Type: "http", Interval: 15, ParentID: groupID, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
+	s.AddSite(context.Background(), models.SiteConfig{Name: "Top Level", URL: "https://example.com", Type: "http", Interval: 30, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})

-	f, err := Export(s)
+	f, err := Export(context.Background(), s)
 	if err != nil {
 		t.Fatalf("Export: %v", err)
 	}
@@ -70,12 +72,12 @@ func TestExportGroupHierarchy(t *testing.T) {

 func TestExportOmitsDefaults(t *testing.T) {
 	s := newTestStore(t)
-	s.AddSite(models.Site{
+	s.AddSite(context.Background(), models.SiteConfig{
 		Name: "Web", URL: "https://example.com", Type: "http", Interval: 30,
 		Method: "GET", AcceptedCodes: "200-299", ExpiryThreshold: 7,
 	})

-	f, err := Export(s)
+	f, err := Export(context.Background(), s)
 	if err != nil {
 		t.Fatalf("Export: %v", err)
 	}
@@ -94,18 +96,18 @@ func TestExportOmitsDefaults(t *testing.T) {

 func TestExportRoundTrip(t *testing.T) {
 	s1 := newTestStore(t)
-	s1.AddAlert("Discord", "discord", map[string]string{"url": "https://example.com"})
-	alerts, _ := s1.GetAllAlerts()
-	s1.AddSite(models.Site{Name: "Web", URL: "https://example.com", Type: "http", Interval: 30, AlertID: alerts[0].ID, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
-	s1.AddSite(models.Site{Name: "Ping", Type: "ping", Hostname: "10.0.0.1", Interval: 60, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
+	s1.AddAlert(context.Background(), "Discord", "discord", map[string]string{"url": "https://example.com"})
+	alerts, _ := s1.GetAllAlerts(context.Background())
+	s1.AddSite(context.Background(), models.SiteConfig{Name: "Web", URL: "https://example.com", Type: "http", Interval: 30, AlertID: alerts[0].ID, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})
+	s1.AddSite(context.Background(), models.SiteConfig{Name: "Ping", Type: "ping", Hostname: "10.0.0.1", Interval: 60, ExpiryThreshold: 7, Method: "GET", AcceptedCodes: "200-299"})

-	exported, err := Export(s1)
+	exported, err := Export(context.Background(), s1)
 	if err != nil {
 		t.Fatalf("Export: %v", err)
 	}

 	s2 := newTestStore(t)
-	changes, err := Apply(s2, exported, ApplyOpts{})
+	changes, err := Apply(context.Background(), s2, exported, ApplyOpts{})
 	if err != nil {
 		t.Fatalf("Apply: %v", err)
 	}
@@ -120,7 +122,7 @@ func TestExportRoundTrip(t *testing.T) {
 		t.Fatalf("expected 3 creates, got %d", creates)
 	}

-	reexported, err := Export(s2)
+	reexported, err := Export(context.Background(), s2)
 	if err != nil {
 		t.Fatalf("re-Export: %v", err)
 	}
@@ -1,11 +1,14 @@
 package importer

 import (
+	"crypto/rand"
+	"encoding/hex"
 	"encoding/json"
 	"fmt"
-	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 	"os"
 	"strings"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 )

 type KumaBackup struct {
@@ -80,7 +83,7 @@ func ConvertKuma(kb *KumaBackup) models.Backup {
 		}
 	}

-	var sites []models.Site
+	var sites []models.SiteConfig
 	for _, m := range kb.MonitorList {
 		site := convertKumaMonitor(m, kumaToUpkeepAlert)
 		sites = append(sites, site)
@@ -132,8 +135,8 @@ func convertKumaNotifications(entries []KumaNotifEntry) map[int]models.AlertConf
 	return result
 }

-func convertKumaMonitor(m KumaMonitor, alertMap map[int]int) models.Site {
-	site := models.Site{
+func convertKumaMonitor(m KumaMonitor, alertMap map[int]int) models.SiteConfig {
+	site := models.SiteConfig{
 		ID:          m.ID,
 		Name:        m.Name,
 		Description: m.Description,
@@ -155,10 +158,18 @@ func convertKumaMonitor(m KumaMonitor, alertMap map[int]int) models.Site {
 	site.DNSResolveType = m.DNSResolveType
 	site.DNSServer = m.DNSResolveServer

+	site.Paused = !m.Active
+
 	switch m.Type {
 	case "http":
 		site.URL = m.URL
 		site.CheckSSL = m.ExpiryNotif
+	case "push":
+		site.Type = "push"
+		b := make([]byte, 16)
+		if _, err := rand.Read(b); err == nil {
+			site.Token = hex.EncodeToString(b)
+		}
 	case "ping":
 		if m.Hostname != "" {
 			site.Hostname = m.Hostname
@@ -0,0 +1,210 @@
+package importer
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func writeTemp(t *testing.T, content string) string {
+	t.Helper()
+	path := filepath.Join(t.TempDir(), "backup.json")
+	if err := os.WriteFile(path, []byte(content), 0o600); err != nil {
+		t.Fatal(err)
+	}
+	return path
+}
+
+func TestLoadKumaFileMissingFile(t *testing.T) {
+	_, err := LoadKumaFile(filepath.Join(t.TempDir(), "nope.json"))
+	if err == nil {
+		t.Fatal("expected error for missing file")
+	}
+}
+
+func TestLoadKumaFileMalformedInput(t *testing.T) {
+	cases := []struct {
+		name string
+		body string
+	}{
+		{"empty file", ""},
+		{"truncated JSON", `{"version": "1.23", "monitorList": [`},
+		{"not JSON", "definitely not json"},
+		{"wrong root type", `[1, 2, 3]`},
+		{"monitorList wrong type", `{"monitorList": {"a": 1}}`},
+		{"monitor field wrong type", `{"monitorList": [{"id": "not-an-int"}]}`},
+		{"notificationList wrong type", `{"notificationList": "oops"}`},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			_, err := LoadKumaFile(writeTemp(t, tc.body))
+			if err == nil {
+				t.Fatalf("expected parse error for %s", tc.name)
+			}
+			if !strings.Contains(err.Error(), "parse JSON") {
+				t.Fatalf("expected wrapped parse error, got: %v", err)
+			}
+		})
+	}
+}
+
+func TestLoadKumaFileNullLists(t *testing.T) {
+	kb, err := LoadKumaFile(writeTemp(t, `{"version": "1.23", "monitorList": null, "notificationList": null}`))
+	if err != nil {
+		t.Fatal(err)
+	}
+	backup := ConvertKuma(kb)
+	if len(backup.Sites) != 0 || len(backup.Alerts) != 0 {
+		t.Fatalf("expected empty backup, got %d sites %d alerts", len(backup.Sites), len(backup.Alerts))
+	}
+}
+
+func TestConvertKumaSkipsMalformedNotificationConfig(t *testing.T) {
+	kb := &KumaBackup{
+		NotificationList: []KumaNotifEntry{
+			{ID: 1, Name: "broken", Config: "{not json"},
+			{ID: 2, Name: "good", Config: `{"type": "discord", "ntfyserverurl": "https://example.com/hook"}`},
+		},
+		MonitorList: []KumaMonitor{
+			{ID: 10, Name: "site", Type: "http", URL: "https://example.com", NotificationIDs: map[string]bool{"1": true}},
+		},
+	}
+	backup := ConvertKuma(kb)
+	if len(backup.Alerts) != 1 {
+		t.Fatalf("expected broken notification skipped, got %d alerts", len(backup.Alerts))
+	}
+	if backup.Alerts[0].Type != "discord" {
+		t.Fatalf("expected discord alert, got %q", backup.Alerts[0].Type)
+	}
+	if backup.Sites[0].AlertID != 0 {
+		t.Fatalf("site referencing skipped notification should keep AlertID 0, got %d", backup.Sites[0].AlertID)
+	}
+}
+
+func TestConvertKumaNtfyNotification(t *testing.T) {
+	kb := &KumaBackup{
+		NotificationList: []KumaNotifEntry{
+			{ID: 3, Name: "ntfy", Config: `{
+				"type": "ntfy",
+				"ntfyserverurl": "https://ntfy.example.com/",
+				"ntfytopic": "uptime",
+				"ntfyPriority": 4,
+				"ntfyAuthenticationMethod": "usernamePassword",
+				"ntfyusername": "u",
+				"ntfypassword": "p"
+			}`},
+		},
+	}
+	backup := ConvertKuma(kb)
+	if len(backup.Alerts) != 1 {
+		t.Fatalf("expected 1 alert, got %d", len(backup.Alerts))
+	}
+	a := backup.Alerts[0]
+	if a.Type != "ntfy" {
+		t.Fatalf("expected ntfy, got %q", a.Type)
+	}
+	if a.Settings["url"] != "https://ntfy.example.com" {
+		t.Fatalf("expected trailing slash trimmed, got %q", a.Settings["url"])
+	}
+	if a.Settings["topic"] != "uptime" || a.Settings["priority"] != "4" {
+		t.Fatalf("unexpected settings: %v", a.Settings)
+	}
+	if a.Settings["username"] != "u" || a.Settings["password"] != "p" {
+		t.Fatalf("expected credentials mapped, got %v", a.Settings)
+	}
+}
+
+func TestConvertKumaUnknownNotificationFallsBackToWebhook(t *testing.T) {
+	kb := &KumaBackup{
+		NotificationList: []KumaNotifEntry{
+			{ID: 4, Name: "matrix", Config: `{"type": "matrix", "ntfyserverurl": "https://example.com/hook"}`},
+		},
+	}
+	backup := ConvertKuma(kb)
+	if len(backup.Alerts) != 1 || backup.Alerts[0].Type != "webhook" {
+		t.Fatalf("expected webhook fallback, got %+v", backup.Alerts)
+	}
+}
+
+func TestConvertKumaHTTPMonitor(t *testing.T) {
+	kb := &KumaBackup{
+		NotificationList: []KumaNotifEntry{
+			{ID: 1, Name: "hook", Config: `{"type": "slack", "ntfyserverurl": "https://example.com/hook"}`},
+		},
+		MonitorList: []KumaMonitor{{
+			ID:              7,
+			Name:            "web",
+			Type:            "http",
+			URL:             "https://example.com",
+			Interval:        60,
+			Timeout:         30,
+			MaxRetries:      2,
+			Method:          "GET",
+			AcceptedCodes:   []string{"200", "301"},
+			IgnoreTLS:       true,
+			ExpiryNotif:     true,
+			Active:          false,
+			NotificationIDs: map[string]bool{"1": true},
+		}},
+	}
+	backup := ConvertKuma(kb)
+	if len(backup.Sites) != 1 {
+		t.Fatalf("expected 1 site, got %d", len(backup.Sites))
+	}
+	s := backup.Sites[0]
+	if s.URL != "https://example.com" || !s.CheckSSL || !s.IgnoreTLS {
+		t.Fatalf("http fields not mapped: %+v", s)
+	}
+	if !s.Paused {
+		t.Fatal("inactive monitor should import paused")
+	}
+	if s.AcceptedCodes != "200,301" {
+		t.Fatalf("expected joined accepted codes, got %q", s.AcceptedCodes)
+	}
+	if s.AlertID != 1 {
+		t.Fatalf("expected alert mapped, got %d", s.AlertID)
+	}
+}
+
+func TestConvertKumaPushMonitorGetsToken(t *testing.T) {
+	kb := &KumaBackup{
+		MonitorList: []KumaMonitor{{ID: 1, Name: "push", Type: "push", Active: true}},
+	}
+	backup := ConvertKuma(kb)
+	token := backup.Sites[0].Token
+	if len(token) != 32 {
+		t.Fatalf("expected 32-char hex token, got %q", token)
+	}
+}
+
+func TestConvertKumaNonNumericNotificationID(t *testing.T) {
+	kb := &KumaBackup{
+		MonitorList: []KumaMonitor{{
+			ID:              1,
+			Name:            "site",
+			Type:            "http",
+			NotificationIDs: map[string]bool{"abc": true},
+		}},
+	}
+	backup := ConvertKuma(kb)
+	if backup.Sites[0].AlertID != 0 {
+		t.Fatalf("non-numeric notification ID should not map, got %d", backup.Sites[0].AlertID)
+	}
+}
+
+func TestConvertKumaGroupAndChildren(t *testing.T) {
+	kb := &KumaBackup{
+		MonitorList: []KumaMonitor{
+			{ID: 1, Name: "grp", Type: "group", Active: true},
+			{ID: 2, Name: "ping", Type: "ping", Hostname: "10.0.0.1", Parent: 1, Active: true},
+		},
+	}
+	backup := ConvertKuma(kb)
+	if backup.Sites[0].Type != "group" {
+		t.Fatalf("expected group type, got %q", backup.Sites[0].Type)
+	}
+	if backup.Sites[1].ParentID != 1 || backup.Sites[1].Hostname != "10.0.0.1" {
+		t.Fatalf("child not mapped: %+v", backup.Sites[1])
+	}
+}
@@ -2,11 +2,12 @@ package metrics

 import (
 	"fmt"
-	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
-	"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
 	"net/http"
 	"sort"
 	"strings"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
 )

 func Handler(eng *monitor.Engine) http.HandlerFunc {
@@ -19,7 +20,7 @@ func Handler(eng *monitor.Engine) http.HandlerFunc {
 		writeHelp(&b, "uptop_monitor_up", "gauge", "Whether the monitor is up (1) or down (0).")
 		for _, s := range sites {
 			val := 0
-			if s.Status == "UP" {
+			if s.Status == models.StatusUp {
 				val = 1
 			}
 			writeGauge(&b, "uptop_monitor_up", labels(s), float64(val))
@@ -10,75 +10,21 @@ import (

 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store/storetest"
 )

 type mockStore struct {
-	sites []models.Site
+	storetest.BaseMock
+	sites []models.SiteConfig
 }

-func (m *mockStore) Init() error                                              { return nil }
-func (m *mockStore) GetSites() ([]models.Site, error)                         { return m.sites, nil }
-func (m *mockStore) AddSite(models.Site) error                                { return nil }
-func (m *mockStore) UpdateSite(models.Site) error                             { return nil }
-func (m *mockStore) UpdateSitePaused(int, bool) error                         { return nil }
-func (m *mockStore) DeleteSite(int) error                                     { return nil }
-func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error)              { return nil, nil }
-func (m *mockStore) GetAlert(int) (models.AlertConfig, error)                 { return models.AlertConfig{}, nil }
-func (m *mockStore) AddAlert(string, string, map[string]string) error         { return nil }
-func (m *mockStore) UpdateAlert(int, string, string, map[string]string) error { return nil }
-func (m *mockStore) DeleteAlert(int) error                                    { return nil }
-func (m *mockStore) GetAllUsers() ([]models.User, error)                      { return nil, nil }
-func (m *mockStore) AddUser(string, string, string) error                     { return nil }
-func (m *mockStore) UpdateUser(int, string, string, string) error             { return nil }
-func (m *mockStore) DeleteUser(int) error                                     { return nil }
-func (m *mockStore) SaveCheck(int, int64, bool) error                         { return nil }
-func (m *mockStore) LoadAllHistory(int) (map[int][]models.CheckRecord, error) {
-	return nil, nil
+func (m *mockStore) GetSites(_ context.Context) ([]models.SiteConfig, error) {
+	return m.sites, nil
 }
-func (m *mockStore) ExportData() (models.Backup, error)        { return models.Backup{}, nil }
-func (m *mockStore) ImportData(models.Backup) error            { return nil }
-func (m *mockStore) GetSiteByName(string) (models.Site, error) { return models.Site{}, nil }
-func (m *mockStore) GetAlertByName(string) (models.AlertConfig, error) {
-	return models.AlertConfig{}, nil
-}
-func (m *mockStore) AddSiteReturningID(models.Site) (int, error) { return 0, nil }
-func (m *mockStore) AddAlertReturningID(string, string, map[string]string) (int, error) {
-	return 0, nil
-}
-func (m *mockStore) SaveCheckFromNode(int, string, int64, bool) error { return nil }
-func (m *mockStore) RegisterNode(models.ProbeNode) error              { return nil }
-func (m *mockStore) GetNode(string) (models.ProbeNode, error)         { return models.ProbeNode{}, nil }
-func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error)         { return nil, nil }
-func (m *mockStore) UpdateNodeLastSeen(string) error                  { return nil }
-func (m *mockStore) DeleteNode(string) error                          { return nil }
-func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
-	return nil, nil
-}
-func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
-func (m *mockStore) SaveLog(string) error                           { return nil }
-func (m *mockStore) LoadLogs(int) ([]string, error)                 { return nil, nil }
-func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
-	return nil, nil
-}
-func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
-	return nil, nil
-}
-func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error    { return nil }
-func (m *mockStore) EndMaintenanceWindow(int) error                         { return nil }
-func (m *mockStore) DeleteMaintenanceWindow(int) error                      { return nil }
-func (m *mockStore) IsMonitorInMaintenance(int) (bool, error)               { return false, nil }
-func (m *mockStore) GetPreference(string) (string, error)                   { return "", nil }
-func (m *mockStore) SetPreference(string, string) error                     { return nil }
-func (m *mockStore) SaveStateChange(int, string, string, string) error      { return nil }
-func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
-func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
-	return nil, nil
-}
-func (m *mockStore) Close() error { return nil }

 func TestMetricsHandler(t *testing.T) {
 	ms := &mockStore{
-		sites: []models.Site{
+		sites: []models.SiteConfig{
 			{ID: 1, Name: "Example", URL: "https://example.com", Type: "http", Interval: 30},
 			{ID: 2, Name: "DNS Check", Type: "dns", Interval: 60},
 		},
@@ -2,7 +2,7 @@ package models

 import "time"

-type Site struct {
+type SiteConfig struct {
 	ID              int
 	Name            string
 	URL             string
@@ -26,9 +26,11 @@ type Site struct {
 	IgnoreTLS      bool
 	Paused         bool
 	Regions        string
+}

+type SiteState struct {
 	FailureCount    int
-	Status          string
+	Status          Status
 	StatusCode      int
 	Latency         time.Duration
 	CertExpiry      time.Time
@@ -40,6 +42,11 @@ type Site struct {
 	LastSuccessAt   time.Time
 }

+type Site struct {
+	SiteConfig
+	SiteState
+}
+
 type StateChange struct {
 	ID          int
 	SiteID      int
@@ -103,7 +110,7 @@ type MaintenanceWindow struct {
 }

 type Backup struct {
-	Sites              []Site              `json:"sites"`
+	Sites              []SiteConfig        `json:"sites"`
 	Alerts             []AlertConfig       `json:"alerts"`
 	Users              []User              `json:"users"`
 	MaintenanceWindows []MaintenanceWindow `json:"maintenance_windows,omitempty"`
@@ -0,0 +1,36 @@
+package models
+
+// safeAlertSettingKeys lists, per provider type, the alert settings that are
+// NOT secret and may be shown or exported in the clear. Everything else is
+// redacted. Providers absent from this map (discord, slack, webhook, pushover)
+// carry their secret in a field a denylist would miss — the webhook URL, the
+// pushover token/user — so all of their settings are redacted.
+var safeAlertSettingKeys = map[string]map[string]bool{
+	"email":     {"host": true, "port": true, "to": true, "from": true},
+	"ntfy":      {"topic": true, "priority": true},
+	"telegram":  {"chat_id": true},
+	"pagerduty": {"severity": true},
+	"gotify":    {"priority": true},
+	"opsgenie":  {"priority": true, "eu": true},
+}
+
+// RedactAlertSettings keeps only the known-safe keys for the alert type and
+// redacts everything else. An allowlist fails safe: an unknown or newly added
+// setting is redacted by default instead of leaking. Shared by the backup
+// export path and the TUI alert detail panel so both render through the same
+// policy.
+func RedactAlertSettings(alertType string, settings map[string]string) map[string]string {
+	safe := safeAlertSettingKeys[alertType]
+	redacted := make(map[string]string, len(settings))
+	for k, v := range settings {
+		switch {
+		case v == "":
+			redacted[k] = ""
+		case safe[k]:
+			redacted[k] = v
+		default:
+			redacted[k] = "***REDACTED***"
+		}
+	}
+	return redacted
+}
@@ -0,0 +1,18 @@
+package models
+
+type Status string
+
+const (
+	StatusUp      Status = "UP"
+	StatusDown    Status = "DOWN"
+	StatusPending Status = "PENDING"
+	StatusLate    Status = "LATE"
+	StatusStale   Status = "STALE"
+	StatusSSLExp  Status = "SSL EXP"
+)
+
+func (s Status) IsBroken() bool {
+	return s == StatusDown || s == StatusSSLExp
+}
+
+func (s Status) String() string { return string(s) }
@@ -3,6 +3,7 @@ package monitor
 import (
 	"context"
 	"fmt"
+	"io"
 	"net"
 	"net/http"
 	"strconv"
@@ -15,6 +16,16 @@ import (
 	probing "github.com/prometheus-community/pro-bing"
 )

+const (
+	maxErrorLength       = 256
+	defaultAcceptedCodes = "200-299"
+	defaultHTTPStatusMin = 200
+	defaultHTTPStatusMax = 300
+	defaultTimeout       = 5 * time.Second
+	defaultDNSServer     = "1.1.1.1"
+	defaultDNSPort       = "53"
+)
+
 type CheckResult struct {
 	SiteID      int
 	Status      string // "UP", "DOWN", "SSL EXP"
@@ -25,52 +36,57 @@ type CheckResult struct {
 	ErrorReason string
 }

-func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bool, allowPrivate ...bool) CheckResult {
-	private := len(allowPrivate) > 0 && allowPrivate[0]
-
-	if site.Type != "http" && site.Type != "dns" && !private {
+func RunCheck(ctx context.Context, site models.SiteConfig, strict, insecure *http.Client, globalInsecure, allowPrivate bool) CheckResult {
+	// Resolve + validate once for non-HTTP types to prevent DNS-rebind TOCTOU:
+	// a second resolve in the check function could return a different (private) IP.
+	// HTTP is safe — SafeDialContext resolves and validates at dial time.
+	var pinnedIP net.IP
+	if site.Type != "http" && site.Type != "dns" && !allowPrivate {
 		host := site.Hostname
 		if host == "" {
 			host = site.URL
 		}
 		if host != "" {
-			if ips, err := net.LookupIP(host); err == nil {
+			ips, err := net.LookupIP(host)
+			if err != nil {
+				return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "resolve failed: " + err.Error()}
+			}
 			for _, ip := range ips {
 				if isPrivateIP(ip) {
-						return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "target resolves to private IP"}
-					}
+					return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "target resolves to private IP"}
 				}
 			}
+			pinnedIP = ips[0]
 		}
 	}

 	switch site.Type {
 	case "http":
-		return runHTTPCheck(site, strict, insecure, globalInsecure)
+		return runHTTPCheck(ctx, site, strict, insecure, globalInsecure)
 	case "ping":
-		return runPingCheck(site)
+		return runPingCheck(ctx, site, pinnedIP)
 	case "port":
-		return runPortCheck(site)
+		return runPortCheck(ctx, site, pinnedIP)
 	case "dns":
-		return runDNSCheck(site)
+		return runDNSCheck(ctx, site, allowPrivate)
 	default:
-		return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "unsupported monitor type: " + site.Type}
+		return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "unsupported monitor type: " + site.Type}
 	}
 }

-func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecure bool) CheckResult {
+func runHTTPCheck(ctx context.Context, site models.SiteConfig, strict, insecure *http.Client, globalInsecure bool) CheckResult {
 	method := site.Method
 	if method == "" {
 		method = "GET"
 	}

 	timeout := siteTimeout(site)
-	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	ctx, cancel := context.WithTimeout(ctx, timeout)
 	defer cancel()

 	req, err := http.NewRequestWithContext(ctx, method, site.URL, nil)
 	if err != nil {
-		return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "invalid request: " + err.Error()}
+		return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "invalid request: " + err.Error()}
 	}

 	client := strict
@@ -84,23 +100,26 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur

 	result := CheckResult{
 		SiteID:    site.ID,
-		Status:    "UP",
+		Status:    string(models.StatusUp),
 		LatencyNs: latency.Nanoseconds(),
 	}

 	if err != nil {
-		result.Status = "DOWN"
-		result.ErrorReason = truncateError(err.Error(), 256)
+		result.Status = string(models.StatusDown)
+		result.ErrorReason = truncateError(err.Error(), maxErrorLength)
 		return result
 	}
-	defer resp.Body.Close()
+	defer func() {
+		_, _ = io.Copy(io.Discard, resp.Body)
+		_ = resp.Body.Close()
+	}()

 	result.StatusCode = resp.StatusCode
 	if !isCodeAccepted(resp.StatusCode, site.AcceptedCodes) {
-		result.Status = "DOWN"
+		result.Status = string(models.StatusDown)
 		expected := site.AcceptedCodes
 		if expected == "" {
-			expected = "200-299"
+			expected = defaultAcceptedCodes
 		}
 		result.ErrorReason = fmt.Sprintf("HTTP %d (expected %s)", resp.StatusCode, expected)
 	}
@@ -110,7 +129,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
 		cert := resp.TLS.PeerCertificates[0]
 		result.CertExpiry = cert.NotAfter
 		if time.Now().After(cert.NotAfter) {
-			result.Status = "SSL EXP"
+			result.Status = string(models.StatusSSLExp)
 			result.ErrorReason = "SSL certificate expired"
 		}
 	}
@@ -118,7 +137,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
 	return result
 }

-func runPingCheck(site models.Site) CheckResult {
+func runPingCheck(_ context.Context, site models.SiteConfig, pinnedIP net.IP) CheckResult {
 	host := site.Hostname
 	if host == "" {
 		host = site.URL
@@ -126,7 +145,10 @@ func runPingCheck(site models.Site) CheckResult {

 	pinger, err := probing.NewPinger(host)
 	if err != nil {
-		return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "ping setup: " + err.Error()}
+		return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "ping setup: " + err.Error()}
+	}
+	if pinnedIP != nil {
+		pinger.SetIPAddr(&net.IPAddr{IP: pinnedIP})
 	}
 	pinger.Count = 1
 	pinger.Timeout = siteTimeout(site)
@@ -137,21 +159,24 @@ func runPingCheck(site models.Site) CheckResult {
 	latency := time.Since(start)

 	if err != nil {
-		return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "ping failed: " + err.Error()}
+		return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), LatencyNs: latency.Nanoseconds(), ErrorReason: "ping failed: " + err.Error()}
 	}
 	if pinger.Statistics().PacketsRecv == 0 {
-		return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "no ICMP response"}
+		return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), LatencyNs: latency.Nanoseconds(), ErrorReason: "no ICMP response"}
 	}

 	stats := pinger.Statistics()
-	return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: stats.AvgRtt.Nanoseconds()}
+	return CheckResult{SiteID: site.ID, Status: string(models.StatusUp), LatencyNs: stats.AvgRtt.Nanoseconds()}
 }

-func runPortCheck(site models.Site) CheckResult {
+func runPortCheck(_ context.Context, site models.SiteConfig, pinnedIP net.IP) CheckResult {
 	host := site.Hostname
 	if host == "" {
 		host = site.URL
 	}
+	if pinnedIP != nil {
+		host = pinnedIP.String()
+	}
 	addr := net.JoinHostPort(host, strconv.Itoa(site.Port))
 	timeout := siteTimeout(site)

@@ -160,13 +185,13 @@ func runPortCheck(site models.Site) CheckResult {
 	latency := time.Since(start)

 	if err != nil {
-		return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: truncateError(err.Error(), 256)}
+		return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), LatencyNs: latency.Nanoseconds(), ErrorReason: truncateError(err.Error(), maxErrorLength)}
 	}
 	_ = conn.Close()
-	return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
+	return CheckResult{SiteID: site.ID, Status: string(models.StatusUp), LatencyNs: latency.Nanoseconds()}
 }

-func runDNSCheck(site models.Site) CheckResult {
+func runDNSCheck(_ context.Context, site models.SiteConfig, allowPrivate bool) CheckResult {
 	host := site.Hostname
 	if host == "" {
 		host = site.URL
@@ -174,11 +199,26 @@ func runDNSCheck(site models.Site) CheckResult {

 	server := site.DNSServer
 	if server == "" {
-		server = "1.1.1.1"
+		server = defaultDNSServer
 	}
-	if _, _, err := net.SplitHostPort(server); err != nil {
-		server = net.JoinHostPort(server, "53")
+	serverHost, serverPort, err := net.SplitHostPort(server)
+	if err != nil {
+		serverHost = server
+		serverPort = defaultDNSPort
 	}
+	if !allowPrivate {
+		if serverPort != defaultDNSPort {
+			return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "DNS server port must be 53"}
+		}
+		if ips, err := net.LookupIP(serverHost); err == nil {
+			for _, ip := range ips {
+				if isPrivateIP(ip) {
+					return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), ErrorReason: "DNS server resolves to private address"}
+				}
+			}
+		}
+	}
+	server = net.JoinHostPort(serverHost, serverPort)

 	qtype := dns.TypeA
 	switch site.DNSResolveType {
@@ -211,24 +251,24 @@ func runDNSCheck(site models.Site) CheckResult {
 	latency := time.Since(start)

 	if err != nil {
-		return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS query failed: " + err.Error()}
+		return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS query failed: " + err.Error()}
 	}
 	if r.Rcode != dns.RcodeSuccess {
-		return CheckResult{SiteID: site.ID, Status: "DOWN", StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS RCODE: " + dns.RcodeToString[r.Rcode]}
+		return CheckResult{SiteID: site.ID, Status: string(models.StatusDown), StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS RCODE: " + dns.RcodeToString[r.Rcode]}
 	}
-	return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
+	return CheckResult{SiteID: site.ID, Status: string(models.StatusUp), LatencyNs: latency.Nanoseconds()}
 }

-func siteTimeout(site models.Site) time.Duration {
+func siteTimeout(site models.SiteConfig) time.Duration {
 	if site.Timeout > 0 {
 		return time.Duration(site.Timeout) * time.Second
 	}
-	return 5 * time.Second
+	return defaultTimeout
 }

 func isCodeAccepted(code int, accepted string) bool {
 	if accepted == "" {
-		return code >= 200 && code < 300
+		return code >= defaultHTTPStatusMin && code < defaultHTTPStatusMax
 	}
 	for _, part := range strings.Split(accepted, ",") {
 		part = strings.TrimSpace(part)
@@ -1,6 +1,7 @@
 package monitor

 import (
+	"context"
 	"crypto/tls"
 	"net"
 	"net/http"
@@ -18,8 +19,8 @@ func TestRunCheck_HTTP_Success(t *testing.T) {
 	}))
 	defer srv.Close()

-	site := models.Site{ID: 1, Type: "http", URL: srv.URL}
-	result := RunCheck(site, http.DefaultClient, http.DefaultClient, false)
+	site := models.SiteConfig{ID: 1, Type: "http", URL: srv.URL}
+	result := RunCheck(context.Background(), site, http.DefaultClient, http.DefaultClient, false, false)

 	if result.Status != "UP" {
 		t.Errorf("expected UP, got %s", result.Status)
@@ -38,8 +39,8 @@ func TestRunCheck_HTTP_ServerError(t *testing.T) {
 	}))
 	defer srv.Close()

-	site := models.Site{ID: 1, Type: "http", URL: srv.URL}
-	result := RunCheck(site, http.DefaultClient, http.DefaultClient, false)
+	site := models.SiteConfig{ID: 1, Type: "http", URL: srv.URL}
+	result := RunCheck(context.Background(), site, http.DefaultClient, http.DefaultClient, false, false)

 	if result.Status != "DOWN" {
 		t.Errorf("expected DOWN, got %s", result.Status)
@@ -59,8 +60,8 @@ func TestRunCheck_HTTP_CustomAcceptedCodes(t *testing.T) {
 		return http.ErrUseLastResponse
 	}}

-	site := models.Site{ID: 1, Type: "http", URL: srv.URL, AcceptedCodes: "200-399"}
-	result := RunCheck(site, client, client, false)
+	site := models.SiteConfig{ID: 1, Type: "http", URL: srv.URL, AcceptedCodes: "200-399"}
+	result := RunCheck(context.Background(), site, client, client, false, false)

 	if result.Status != "UP" {
 		t.Errorf("expected UP with accepted 200-399, got %s", result.Status)
@@ -75,8 +76,8 @@ func TestRunCheck_HTTP_MethodRespected(t *testing.T) {
 	}))
 	defer srv.Close()

-	site := models.Site{ID: 1, Type: "http", URL: srv.URL, Method: "HEAD"}
-	RunCheck(site, http.DefaultClient, http.DefaultClient, false)
+	site := models.SiteConfig{ID: 1, Type: "http", URL: srv.URL, Method: "HEAD"}
+	RunCheck(context.Background(), site, http.DefaultClient, http.DefaultClient, false, false)

 	if receivedMethod != "HEAD" {
 		t.Errorf("expected HEAD, got %s", receivedMethod)
@@ -90,8 +91,8 @@ func TestRunCheck_HTTP_Timeout(t *testing.T) {
 	}))
 	defer srv.Close()

-	site := models.Site{ID: 1, Type: "http", URL: srv.URL, Timeout: 1}
-	result := RunCheck(site, http.DefaultClient, http.DefaultClient, false)
+	site := models.SiteConfig{ID: 1, Type: "http", URL: srv.URL, Timeout: 1}
+	result := RunCheck(context.Background(), site, http.DefaultClient, http.DefaultClient, false, false)

 	if result.Status != "DOWN" {
 		t.Errorf("expected DOWN on timeout, got %s", result.Status)
@@ -108,8 +109,8 @@ func TestRunCheck_HTTP_SSLFields(t *testing.T) {
 		Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}},
 	}

-	site := models.Site{ID: 1, Type: "http", URL: srv.URL, CheckSSL: true, IgnoreTLS: true}
-	result := RunCheck(site, http.DefaultClient, insecureClient, false)
+	site := models.SiteConfig{ID: 1, Type: "http", URL: srv.URL, CheckSSL: true, IgnoreTLS: true}
+	result := RunCheck(context.Background(), site, http.DefaultClient, insecureClient, false, false)

 	if result.Status != "UP" {
 		t.Errorf("expected UP, got %s", result.Status)
@@ -132,8 +133,8 @@ func TestRunCheck_Port_Open(t *testing.T) {
 	_, portStr, _ := net.SplitHostPort(ln.Addr().String())
 	port, _ := strconv.Atoi(portStr)

-	site := models.Site{ID: 1, Type: "port", Hostname: "127.0.0.1", Port: port, Timeout: 2}
-	result := RunCheck(site, nil, nil, false, true)
+	site := models.SiteConfig{ID: 1, Type: "port", Hostname: "127.0.0.1", Port: port, Timeout: 2}
+	result := RunCheck(context.Background(), site, nil, nil, false, true)

 	if result.Status != "UP" {
 		t.Errorf("expected UP, got %s", result.Status)
@@ -152,14 +153,51 @@ func TestRunCheck_Port_Closed(t *testing.T) {
 	port, _ := strconv.Atoi(portStr)
 	ln.Close()

-	site := models.Site{ID: 1, Type: "port", Hostname: "127.0.0.1", Port: port, Timeout: 1}
-	result := RunCheck(site, nil, nil, false, true)
+	site := models.SiteConfig{ID: 1, Type: "port", Hostname: "127.0.0.1", Port: port, Timeout: 1}
+	result := RunCheck(context.Background(), site, nil, nil, false, true)

 	if result.Status != "DOWN" {
 		t.Errorf("expected DOWN, got %s", result.Status)
 	}
 }

+func TestRunPortCheck_UsesPinnedIP(t *testing.T) {
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ln.Close()
+
+	_, portStr, _ := net.SplitHostPort(ln.Addr().String())
+	port, _ := strconv.Atoi(portStr)
+
+	// Pass a pinned IP — runPortCheck should dial it instead of resolving Hostname.
+	site := models.SiteConfig{ID: 1, Type: "port", Hostname: "will-not-resolve.invalid", Port: port, Timeout: 2}
+	result := runPortCheck(context.Background(), site, net.ParseIP("127.0.0.1"))
+
+	if result.Status != "UP" {
+		t.Errorf("expected UP when pinned IP used, got %s: %s", result.Status, result.ErrorReason)
+	}
+}
+
+func TestRunPortCheck_NilPinnedIP_UsesHostname(t *testing.T) {
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ln.Close()
+
+	_, portStr, _ := net.SplitHostPort(ln.Addr().String())
+	port, _ := strconv.Atoi(portStr)
+
+	site := models.SiteConfig{ID: 1, Type: "port", Hostname: "127.0.0.1", Port: port, Timeout: 2}
+	result := runPortCheck(context.Background(), site, nil)
+
+	if result.Status != "UP" {
+		t.Errorf("expected UP with nil pinnedIP fallback, got %s: %s", result.Status, result.ErrorReason)
+	}
+}
+
 func TestRunCheck_Port_BlocksPrivateByDefault(t *testing.T) {
 	ln, err := net.Listen("tcp", "127.0.0.1:0")
 	if err != nil {
@@ -170,8 +208,8 @@ func TestRunCheck_Port_BlocksPrivateByDefault(t *testing.T) {
 	_, portStr, _ := net.SplitHostPort(ln.Addr().String())
 	port, _ := strconv.Atoi(portStr)

-	site := models.Site{ID: 1, Type: "port", Hostname: "127.0.0.1", Port: port, Timeout: 2}
-	result := RunCheck(site, nil, nil, false)
+	site := models.SiteConfig{ID: 1, Type: "port", Hostname: "127.0.0.1", Port: port, Timeout: 2}
+	result := RunCheck(context.Background(), site, nil, nil, false, false)

 	if result.Status != "DOWN" {
 		t.Errorf("expected DOWN when private targets blocked, got %s", result.Status)
@@ -179,8 +217,8 @@ func TestRunCheck_Port_BlocksPrivateByDefault(t *testing.T) {
 }

 func TestRunCheck_UnknownType(t *testing.T) {
-	site := models.Site{ID: 1, Type: "invalid"}
-	result := RunCheck(site, nil, nil, false)
+	site := models.SiteConfig{ID: 1, Type: "invalid"}
+	result := RunCheck(context.Background(), site, nil, nil, false, false)

 	if result.Status != "DOWN" {
 		t.Errorf("expected DOWN for unknown type, got %s", result.Status)
@@ -213,10 +251,10 @@ func TestIsCodeAccepted(t *testing.T) {
 }

 func TestSiteTimeout(t *testing.T) {
-	if got := siteTimeout(models.Site{Timeout: 0}); got != 5*time.Second {
+	if got := siteTimeout(models.SiteConfig{Timeout: 0}); got != 5*time.Second {
 		t.Errorf("expected 5s default, got %v", got)
 	}
-	if got := siteTimeout(models.Site{Timeout: 10}); got != 10*time.Second {
+	if got := siteTimeout(models.SiteConfig{Timeout: 10}); got != 10*time.Second {
 		t.Errorf("expected 10s, got %v", got)
 	}
 }
@@ -0,0 +1,64 @@
+package monitor
+
+import (
+	"context"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
+)
+
+// dbWrite is a single unit of deferred persistence. The engine enqueues these
+// onto a buffered channel; a single writer goroutine drains and executes them,
+// serializing all writes through one connection and surfacing errors instead of
+// discarding them. desc names the write for diagnostics on drop/failure.
+type dbWrite interface {
+	exec(ctx context.Context, s store.Store) error
+	desc() string
+}
+
+type writeLog struct{ message string }
+
+func (w writeLog) exec(ctx context.Context, s store.Store) error { return s.SaveLog(ctx, w.message) }
+func (w writeLog) desc() string                                  { return "log" }
+
+type writeCheck struct {
+	siteID    int
+	latencyNs int64
+	isUp      bool
+}
+
+func (w writeCheck) exec(ctx context.Context, s store.Store) error {
+	return s.SaveCheck(ctx, w.siteID, w.latencyNs, w.isUp)
+}
+func (w writeCheck) desc() string { return "check" }
+
+type writeStateChange struct {
+	siteID     int
+	fromStatus string
+	toStatus   string
+	reason     string
+}
+
+func (w writeStateChange) exec(ctx context.Context, s store.Store) error {
+	return s.SaveStateChange(ctx, w.siteID, w.fromStatus, w.toStatus, w.reason)
+}
+func (w writeStateChange) desc() string { return "state-change" }
+
+type writeAlertHealth struct{ rec models.AlertHealthRecord }
+
+func (w writeAlertHealth) exec(ctx context.Context, s store.Store) error {
+	return s.SaveAlertHealth(ctx, w.rec)
+}
+func (w writeAlertHealth) desc() string { return "alert-health" }
+
+type writeProbeCheck struct {
+	siteID    int
+	nodeID    string
+	latencyNs int64
+	isUp      bool
+}
+
+func (w writeProbeCheck) exec(ctx context.Context, s store.Store) error {
+	return s.SaveCheckFromNode(ctx, w.siteID, w.nodeID, w.latencyNs, w.isUp)
+}
+func (w writeProbeCheck) desc() string { return "probe-check" }
@@ -1,6 +1,9 @@
 package monitor

-import "time"
+import (
+	"context"
+	"time"
+)

 const maxHistoryLen = 60

@@ -12,7 +15,7 @@ type SiteHistory struct {
 }

 func (e *Engine) InitHistory() {
-	all, err := e.db.LoadAllHistory(maxHistoryLen)
+	all, err := e.db.LoadAllHistory(context.Background(), maxHistoryLen)
 	if err != nil {
 		e.AddLog("Failed to load check history: " + err.Error())
 		return
@@ -61,7 +64,7 @@ func (e *Engine) recordCheck(siteID int, latency time.Duration, isUp bool) {
 		h.Statuses = h.Statuses[len(h.Statuses)-maxHistoryLen:]
 	}

-	go func() { _ = e.db.SaveCheck(siteID, latency.Nanoseconds(), isUp) }()
+	e.enqueueWrite(writeCheck{siteID: siteID, latencyNs: latency.Nanoseconds(), isUp: isUp})
 }

 func (e *Engine) GetHistory(siteID int) (SiteHistory, bool) {
@@ -1,12 +1,14 @@
 package monitor

 import (
+	"context"
 	"fmt"
 	"sync"
 	"testing"
 	"time"

 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store/storetest"
 )

 // --- Mock Store ---
@@ -18,8 +20,9 @@ type savedCheck struct {
 }

 type mockStore struct {
+	storetest.BaseMock
 	mu            sync.Mutex
-	sites         []models.Site
+	sites         []models.SiteConfig
 	alerts        map[int]models.AlertConfig
 	maintenance   map[int]bool
 	logs          []string
@@ -37,55 +40,19 @@ func newMockStore() *mockStore {
 	}
 }

-func (m *mockStore) Init() error                                              { return nil }
-func (m *mockStore) GetSites() ([]models.Site, error)                         { return m.sites, nil }
-func (m *mockStore) AddSite(models.Site) error                                { return nil }
-func (m *mockStore) UpdateSite(models.Site) error                             { return nil }
-func (m *mockStore) UpdateSitePaused(int, bool) error                         { return nil }
-func (m *mockStore) DeleteSite(int) error                                     { return nil }
-func (m *mockStore) AddAlert(string, string, map[string]string) error         { return nil }
-func (m *mockStore) UpdateAlert(int, string, string, map[string]string) error { return nil }
-func (m *mockStore) DeleteAlert(int) error                                    { return nil }
-func (m *mockStore) GetAllUsers() ([]models.User, error)                      { return nil, nil }
-func (m *mockStore) AddUser(string, string, string) error                     { return nil }
-func (m *mockStore) UpdateUser(int, string, string, string) error             { return nil }
-func (m *mockStore) DeleteUser(int) error                                     { return nil }
-func (m *mockStore) ExportData() (models.Backup, error)                       { return models.Backup{}, nil }
-func (m *mockStore) ImportData(models.Backup) error                           { return nil }
-func (m *mockStore) GetSiteByName(string) (models.Site, error)                { return models.Site{}, nil }
-func (m *mockStore) AddSiteReturningID(models.Site) (int, error)              { return 0, nil }
-func (m *mockStore) AddAlertReturningID(string, string, map[string]string) (int, error) {
-	return 0, nil
-}
-func (m *mockStore) SaveCheckFromNode(int, string, int64, bool) error { return nil }
-func (m *mockStore) RegisterNode(models.ProbeNode) error              { return nil }
-func (m *mockStore) GetNode(string) (models.ProbeNode, error)         { return models.ProbeNode{}, nil }
-func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error)         { return nil, nil }
-func (m *mockStore) UpdateNodeLastSeen(string) error                  { return nil }
-func (m *mockStore) DeleteNode(string) error                          { return nil }
-func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
-	return nil, nil
-}
-func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
-func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
-	return nil, nil
-}
-func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
-	return nil, nil
-}
-func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error    { return nil }
-func (m *mockStore) EndMaintenanceWindow(int) error                         { return nil }
-func (m *mockStore) DeleteMaintenanceWindow(int) error                      { return nil }
-func (m *mockStore) GetPreference(string) (string, error)                   { return "", nil }
-func (m *mockStore) SetPreference(string, string) error                     { return nil }
-func (m *mockStore) SaveStateChange(int, string, string, string) error      { return nil }
-func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
-func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
-	return nil, nil
-}
-func (m *mockStore) Close() error { return nil }
+func (m *mockStore) GetSites(context.Context) ([]models.SiteConfig, error) { return m.sites, nil }

-func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error) {
+func (m *mockStore) GetActiveMaintenanceWindows(context.Context) ([]models.MaintenanceWindow, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	var windows []models.MaintenanceWindow
+	for id := range m.maintenance {
+		windows = append(windows, models.MaintenanceWindow{MonitorID: id})
+	}
+	return windows, nil
+}
+
+func (m *mockStore) GetAllAlerts(context.Context) ([]models.AlertConfig, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	var result []models.AlertConfig
@@ -95,7 +62,7 @@ func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error) {
 	return result, nil
 }

-func (m *mockStore) GetAlert(id int) (models.AlertConfig, error) {
+func (m *mockStore) GetAlert(_ context.Context, id int) (models.AlertConfig, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.getAlertCalls = append(m.getAlertCalls, id)
@@ -105,7 +72,7 @@ func (m *mockStore) GetAlert(id int) (models.AlertConfig, error) {
 	return models.AlertConfig{}, fmt.Errorf("alert %d not found", id)
 }

-func (m *mockStore) GetAlertByName(name string) (models.AlertConfig, error) {
+func (m *mockStore) GetAlertByName(_ context.Context, name string) (models.AlertConfig, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	for _, a := range m.alerts {
@@ -116,31 +83,31 @@ func (m *mockStore) GetAlertByName(name string) (models.AlertConfig, error) {
 	return models.AlertConfig{}, fmt.Errorf("alert %q not found", name)
 }

-func (m *mockStore) IsMonitorInMaintenance(id int) (bool, error) {
+func (m *mockStore) IsMonitorInMaintenance(_ context.Context, id int) (bool, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	return m.maintenance[id], nil
 }

-func (m *mockStore) SaveCheck(siteID int, latencyNs int64, isUp bool) error {
+func (m *mockStore) SaveCheck(_ context.Context, siteID int, latencyNs int64, isUp bool) error {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.savedChecks = append(m.savedChecks, savedCheck{siteID, latencyNs, isUp})
 	return nil
 }

-func (m *mockStore) SaveLog(msg string) error {
+func (m *mockStore) SaveLog(_ context.Context, msg string) error {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.savedLogs = append(m.savedLogs, msg)
 	return nil
 }

-func (m *mockStore) LoadLogs(limit int) ([]string, error) {
+func (m *mockStore) LoadLogs(_ context.Context, _ int) ([]string, error) {
 	return m.logs, nil
 }

-func (m *mockStore) LoadAllHistory(limit int) (map[int][]models.CheckRecord, error) {
+func (m *mockStore) LoadAllHistory(_ context.Context, _ int) (map[int][]models.CheckRecord, error) {
 	return m.history, nil
 }

@@ -181,7 +148,10 @@ func (m *mockStore) getAlertCallsSnapshot() []int {
 func TestHandleStatusChange_PendingToUp(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "PENDING", MaxRetries: 3, AlertID: 1}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 3, AlertID: 1},
+		SiteState:  models.SiteState{Status: "PENDING"},
+	}
 	injectSite(e, site)

 	e.handleStatusChange(site, "UP", 200, 10*time.Millisecond, "")
@@ -202,7 +172,10 @@ func TestHandleStatusChange_PendingToUp(t *testing.T) {
 func TestHandleStatusChange_UpIncrementFailure(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 3, FailureCount: 0}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 3},
+		SiteState:  models.SiteState{Status: "UP", FailureCount: 0},
+	}
 	injectSite(e, site)

 	e.handleStatusChange(site, "DOWN", 500, 0, "test error")
@@ -220,7 +193,10 @@ func TestHandleStatusChange_UpToDown_ExceedsRetries(t *testing.T) {
 	ms := newMockStore()
 	ms.alerts[1] = models.AlertConfig{ID: 1, Name: "discord", Type: "webhook", Settings: map[string]string{"url": "http://example.com"}}
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 2, FailureCount: 2, AlertID: 1}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 2, AlertID: 1},
+		SiteState:  models.SiteState{Status: "UP", FailureCount: 2},
+	}
 	injectSite(e, site)

 	e.handleStatusChange(site, "DOWN", 500, 0, "test error")
@@ -243,7 +219,10 @@ func TestHandleStatusChange_UpToDown_ZeroRetries(t *testing.T) {
 	ms := newMockStore()
 	ms.alerts[1] = models.AlertConfig{ID: 1, Name: "test", Type: "webhook", Settings: map[string]string{"url": "http://example.com"}}
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, FailureCount: 0, AlertID: 1}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 0, AlertID: 1},
+		SiteState:  models.SiteState{Status: "UP", FailureCount: 0},
+	}
 	injectSite(e, site)

 	e.handleStatusChange(site, "DOWN", 0, 0, "test error")
@@ -262,7 +241,10 @@ func TestHandleStatusChange_DownToUp_Recovery(t *testing.T) {
 	ms := newMockStore()
 	ms.alerts[1] = models.AlertConfig{ID: 1, Name: "test", Type: "webhook", Settings: map[string]string{"url": "http://example.com"}}
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "DOWN", FailureCount: 4, AlertID: 1}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", AlertID: 1},
+		SiteState:  models.SiteState{Status: "DOWN", FailureCount: 4},
+	}
 	injectSite(e, site)

 	e.handleStatusChange(site, "UP", 200, 5*time.Millisecond, "")
@@ -283,7 +265,10 @@ func TestHandleStatusChange_DownToUp_Recovery(t *testing.T) {
 func TestHandleStatusChange_DownStaysDown(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "DOWN", MaxRetries: 2, FailureCount: 3}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 2},
+		SiteState:  models.SiteState{Status: "DOWN", FailureCount: 3},
+	}
 	injectSite(e, site)

 	e.handleStatusChange(site, "DOWN", 0, 0, "test error")
@@ -302,7 +287,10 @@ func TestHandleStatusChange_SSLExpired(t *testing.T) {
 	ms := newMockStore()
 	ms.alerts[1] = models.AlertConfig{ID: 1, Name: "test", Type: "webhook", Settings: map[string]string{"url": "http://example.com"}}
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 0, AlertID: 1},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, site)

 	e.handleStatusChange(site, "SSL EXP", 0, 0, "SSL certificate expired")
@@ -322,8 +310,12 @@ func TestHandleStatusChange_AlertSuppressedMaintenance(t *testing.T) {
 	ms.maintenance[1] = true
 	ms.alerts[1] = models.AlertConfig{ID: 1, Name: "test", Type: "webhook", Settings: map[string]string{"url": "http://example.com"}}
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 0, AlertID: 1},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, site)
+	e.refreshMaintenanceCache(context.Background())

 	e.handleStatusChange(site, "DOWN", 0, 0, "test error")

@@ -353,8 +345,12 @@ func TestHandleStatusChange_RecoverySuppressedMaintenance(t *testing.T) {
 	ms.maintenance[1] = true
 	ms.alerts[1] = models.AlertConfig{ID: 1, Name: "test", Type: "webhook", Settings: map[string]string{"url": "http://example.com"}}
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "DOWN", AlertID: 1}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", AlertID: 1},
+		SiteState:  models.SiteState{Status: "DOWN"},
+	}
 	injectSite(e, site)
+	e.refreshMaintenanceCache(context.Background())

 	e.handleStatusChange(site, "UP", 200, 0, "")

@@ -373,10 +369,8 @@ func TestHandleStatusChange_SSLWarning(t *testing.T) {
 	ms.alerts[1] = models.AlertConfig{ID: 1, Name: "test", Type: "webhook", Settings: map[string]string{"url": "http://example.com"}}
 	e := newTestEngine(ms)
 	site := models.Site{
-		ID: 1, Name: "test", Status: "UP", Type: "http",
-		CheckSSL: true, HasSSL: true, ExpiryThreshold: 30,
-		SentSSLWarning: false, AlertID: 1,
-		CertExpiry: time.Now().Add(15 * 24 * time.Hour),
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", Type: "http", CheckSSL: true, ExpiryThreshold: 30, AlertID: 1},
+		SiteState:  models.SiteState{Status: "UP", HasSSL: true, SentSSLWarning: false, CertExpiry: time.Now().Add(15 * 24 * time.Hour)},
 	}
 	injectSite(e, site)

@@ -396,10 +390,8 @@ func TestHandleStatusChange_SSLWarningNotRepeated(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
 	site := models.Site{
-		ID: 1, Name: "test", Status: "UP", Type: "http",
-		CheckSSL: true, HasSSL: true, ExpiryThreshold: 30,
-		SentSSLWarning: true, AlertID: 1,
-		CertExpiry: time.Now().Add(15 * 24 * time.Hour),
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", Type: "http", CheckSSL: true, ExpiryThreshold: 30, AlertID: 1},
+		SiteState:  models.SiteState{Status: "UP", HasSSL: true, SentSSLWarning: true, CertExpiry: time.Now().Add(15 * 24 * time.Hour)},
 	}
 	injectSite(e, site)

@@ -415,10 +407,8 @@ func TestHandleStatusChange_SSLWarningReset(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
 	site := models.Site{
-		ID: 1, Name: "test", Status: "UP", Type: "http",
-		CheckSSL: true, HasSSL: true, ExpiryThreshold: 30,
-		SentSSLWarning: true,
-		CertExpiry:     time.Now().Add(60 * 24 * time.Hour),
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", Type: "http", CheckSSL: true, ExpiryThreshold: 30},
+		SiteState:  models.SiteState{Status: "UP", HasSSL: true, SentSSLWarning: true, CertExpiry: time.Now().Add(60 * 24 * time.Hour)},
 	}
 	injectSite(e, site)

@@ -436,12 +426,11 @@ func TestHandleStatusChange_SSLWarningSuppressedMaint(t *testing.T) {
 	ms.alerts[1] = models.AlertConfig{ID: 1, Name: "test", Type: "webhook", Settings: map[string]string{"url": "http://example.com"}}
 	e := newTestEngine(ms)
 	site := models.Site{
-		ID: 1, Name: "test", Status: "UP", Type: "http",
-		CheckSSL: true, HasSSL: true, ExpiryThreshold: 30,
-		SentSSLWarning: false, AlertID: 1,
-		CertExpiry: time.Now().Add(15 * 24 * time.Hour),
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", Type: "http", CheckSSL: true, ExpiryThreshold: 30, AlertID: 1},
+		SiteState:  models.SiteState{Status: "UP", HasSSL: true, SentSSLWarning: false, CertExpiry: time.Now().Add(15 * 24 * time.Hour)},
 	}
 	injectSite(e, site)
+	e.refreshMaintenanceCache(context.Background())

 	e.handleStatusChange(site, "UP", 200, 0, "")

@@ -458,7 +447,10 @@ func TestHandleStatusChange_SSLWarningSuppressedMaint(t *testing.T) {
 func TestHandleStatusChange_InactiveEngine(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 0},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, site)
 	e.SetActive(false)

@@ -475,7 +467,10 @@ func TestHandleStatusChange_InactiveEngine(t *testing.T) {
 func TestRecordHeartbeat_ValidToken(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "push-test", Type: "push", Token: "abc123", Status: "UP"}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "push-test", Type: "push", Token: "abc123"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, site)

 	if !e.RecordHeartbeat("abc123") {
@@ -495,7 +490,10 @@ func TestRecordHeartbeat_RecoveryFromDown(t *testing.T) {
 	ms := newMockStore()
 	ms.alerts[1] = models.AlertConfig{ID: 1, Name: "test", Type: "webhook", Settings: map[string]string{"url": "http://example.com"}}
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "push-test", Type: "push", Token: "abc123", Status: "DOWN", AlertID: 1, FailureCount: 3}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "push-test", Type: "push", Token: "abc123", AlertID: 1},
+		SiteState:  models.SiteState{Status: "DOWN", FailureCount: 3},
+	}
 	injectSite(e, site)

 	if !e.RecordHeartbeat("abc123") {
@@ -527,7 +525,10 @@ func TestRecordHeartbeat_UnknownToken(t *testing.T) {
 func TestRecordHeartbeat_InactiveEngine(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Type: "push", Token: "abc123", Status: "UP"}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Type: "push", Token: "abc123"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, site)
 	e.SetActive(false)

@@ -542,13 +543,12 @@ func TestCheckPush_DeadlineMissed(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
 	site := models.Site{
-		ID: 1, Name: "push", Type: "push", Status: "UP",
-		Interval: 10, MaxRetries: 0,
-		LastCheck: time.Now().Add(-120 * time.Second),
+		SiteConfig: models.SiteConfig{ID: 1, Name: "push", Type: "push", Interval: 10, MaxRetries: 0},
+		SiteState:  models.SiteState{Status: "UP", LastCheck: time.Now().Add(-120 * time.Second)},
 	}
 	injectSite(e, site)

-	e.checkPush(site)
+	e.checkPush(context.Background(), site)

 	s, _ := getSite(e, 1)
 	if s.Status != "DOWN" {
@@ -560,13 +560,12 @@ func TestCheckPush_OverdueBecomesLate(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
 	site := models.Site{
-		ID: 1, Name: "push", Type: "push", Status: "UP",
-		Interval:  300,
-		LastCheck: time.Now().Add(-310 * time.Second),
+		SiteConfig: models.SiteConfig{ID: 1, Name: "push", Type: "push", Interval: 300},
+		SiteState:  models.SiteState{Status: "UP", LastCheck: time.Now().Add(-310 * time.Second)},
 	}
 	injectSite(e, site)

-	e.checkPush(site)
+	e.checkPush(context.Background(), site)

 	s, _ := getSite(e, 1)
 	if s.Status != "LATE" {
@@ -580,13 +579,12 @@ func TestCheckPush_OverdueBecomesStale(t *testing.T) {
 	// interval=300, grace=150 (300/2), staleMark=overdue+75
 	// at 380s: past staleMark(375) but before graceEnd(450)
 	site := models.Site{
-		ID: 1, Name: "push", Type: "push", Status: "UP",
-		Interval:  300,
-		LastCheck: time.Now().Add(-380 * time.Second),
+		SiteConfig: models.SiteConfig{ID: 1, Name: "push", Type: "push", Interval: 300},
+		SiteState:  models.SiteState{Status: "UP", LastCheck: time.Now().Add(-380 * time.Second)},
 	}
 	injectSite(e, site)

-	e.checkPush(site)
+	e.checkPush(context.Background(), site)

 	s, _ := getSite(e, 1)
 	if s.Status != "STALE" {
@@ -598,12 +596,12 @@ func TestCheckPush_WithinDeadline(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
 	site := models.Site{
-		ID: 1, Name: "push", Type: "push", Status: "UP",
-		Interval: 60, LastCheck: time.Now(),
+		SiteConfig: models.SiteConfig{ID: 1, Name: "push", Type: "push", Interval: 60},
+		SiteState:  models.SiteState{Status: "UP", LastCheck: time.Now()},
 	}
 	injectSite(e, site)

-	e.checkPush(site)
+	e.checkPush(context.Background(), site)

 	s, _ := getSite(e, 1)
 	if s.Status != "UP" {
@@ -615,12 +613,12 @@ func TestCheckPush_PendingStaysPending(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
 	site := models.Site{
-		ID: 1, Name: "push", Type: "push", Status: "PENDING",
-		Interval: 60,
+		SiteConfig: models.SiteConfig{ID: 1, Name: "push", Type: "push", Interval: 60},
+		SiteState:  models.SiteState{Status: "PENDING"},
 	}
 	injectSite(e, site)

-	e.checkPush(site)
+	e.checkPush(context.Background(), site)

 	s, _ := getSite(e, 1)
 	if s.Status != "PENDING" {
@@ -633,14 +631,23 @@ func TestCheckPush_PendingStaysPending(t *testing.T) {
 func TestCheckGroup_AllChildrenUp(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	group := models.Site{ID: 1, Name: "group", Type: "group", Status: "PENDING"}
-	child1 := models.Site{ID: 2, Name: "child1", Type: "http", ParentID: 1, Status: "UP"}
-	child2 := models.Site{ID: 3, Name: "child2", Type: "http", ParentID: 1, Status: "UP"}
+	group := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "group", Type: "group"},
+		SiteState:  models.SiteState{Status: "PENDING"},
+	}
+	child1 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 2, Name: "child1", Type: "http", ParentID: 1},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	child2 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 3, Name: "child2", Type: "http", ParentID: 1},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, group)
 	injectSite(e, child1)
 	injectSite(e, child2)

-	e.checkGroup(group)
+	e.checkGroup(context.Background(), group)

 	s, _ := getSite(e, 1)
 	if s.Status != "UP" {
@@ -651,14 +658,23 @@ func TestCheckGroup_AllChildrenUp(t *testing.T) {
 func TestCheckGroup_OneChildDown(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	group := models.Site{ID: 1, Name: "group", Type: "group", Status: "UP"}
-	child1 := models.Site{ID: 2, Name: "child1", Type: "http", ParentID: 1, Status: "UP"}
-	child2 := models.Site{ID: 3, Name: "child2", Type: "http", ParentID: 1, Status: "DOWN"}
+	group := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "group", Type: "group"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	child1 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 2, Name: "child1", Type: "http", ParentID: 1},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	child2 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 3, Name: "child2", Type: "http", ParentID: 1},
+		SiteState:  models.SiteState{Status: "DOWN"},
+	}
 	injectSite(e, group)
 	injectSite(e, child1)
 	injectSite(e, child2)

-	e.checkGroup(group)
+	e.checkGroup(context.Background(), group)

 	s, _ := getSite(e, 1)
 	if s.Status != "DOWN" {
@@ -669,14 +685,22 @@ func TestCheckGroup_OneChildDown(t *testing.T) {
 func TestCheckGroup_PausedChildIgnored(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	group := models.Site{ID: 1, Name: "group", Type: "group"}
-	child1 := models.Site{ID: 2, Name: "child1", Type: "http", ParentID: 1, Status: "UP"}
-	child2 := models.Site{ID: 3, Name: "child2", Type: "http", ParentID: 1, Status: "DOWN", Paused: true}
+	group := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "group", Type: "group"},
+	}
+	child1 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 2, Name: "child1", Type: "http", ParentID: 1},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	child2 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 3, Name: "child2", Type: "http", ParentID: 1, Paused: true},
+		SiteState:  models.SiteState{Status: "DOWN"},
+	}
 	injectSite(e, group)
 	injectSite(e, child1)
 	injectSite(e, child2)

-	e.checkGroup(group)
+	e.checkGroup(context.Background(), group)

 	s, _ := getSite(e, 1)
 	if s.Status != "UP" {
@@ -688,14 +712,23 @@ func TestCheckGroup_MaintenanceChildIgnored(t *testing.T) {
 	ms := newMockStore()
 	ms.maintenance[3] = true
 	e := newTestEngine(ms)
-	group := models.Site{ID: 1, Name: "group", Type: "group"}
-	child1 := models.Site{ID: 2, Name: "child1", Type: "http", ParentID: 1, Status: "UP"}
-	child2 := models.Site{ID: 3, Name: "child2", Type: "http", ParentID: 1, Status: "DOWN"}
+	group := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "group", Type: "group"},
+	}
+	child1 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 2, Name: "child1", Type: "http", ParentID: 1},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	child2 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 3, Name: "child2", Type: "http", ParentID: 1},
+		SiteState:  models.SiteState{Status: "DOWN"},
+	}
 	injectSite(e, group)
 	injectSite(e, child1)
 	injectSite(e, child2)
+	e.refreshMaintenanceCache(context.Background())

-	e.checkGroup(group)
+	e.checkGroup(context.Background(), group)

 	s, _ := getSite(e, 1)
 	if s.Status != "UP" {
@@ -706,10 +739,13 @@ func TestCheckGroup_MaintenanceChildIgnored(t *testing.T) {
 func TestCheckGroup_NoChildren(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	group := models.Site{ID: 1, Name: "group", Type: "group", Status: "UP"}
+	group := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "group", Type: "group"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, group)

-	e.checkGroup(group)
+	e.checkGroup(context.Background(), group)

 	s, _ := getSite(e, 1)
 	if s.Status != "PENDING" {
@@ -801,10 +837,13 @@ func TestInitHistory_LoadsFromDB(t *testing.T) {
 func TestUpdateSiteConfig_PreservesRuntime(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", URL: "http://old.com", Status: "DOWN", FailureCount: 3, Latency: 100 * time.Millisecond}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", URL: "http://old.com"},
+		SiteState:  models.SiteState{Status: "DOWN", FailureCount: 3, Latency: 100 * time.Millisecond},
+	}
 	injectSite(e, site)

-	updated := models.Site{ID: 1, Name: "test", URL: "http://new.com", Interval: 60}
+	updated := models.SiteConfig{ID: 1, Name: "test", URL: "http://new.com", Interval: 60}
 	e.UpdateSiteConfig(updated)

 	s, _ := getSite(e, 1)
@@ -825,7 +864,10 @@ func TestUpdateSiteConfig_PreservesRuntime(t *testing.T) {
 func TestRemoveSite_CleansUp(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Type: "push", Token: "tok1", Status: "UP"}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", Type: "push", Token: "tok1"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, site)
 	e.recordCheck(1, 5*time.Millisecond, true)

@@ -845,7 +887,10 @@ func TestRemoveSite_CleansUp(t *testing.T) {
 func TestToggleSitePause(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "UP"}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, site)

 	paused := e.ToggleSitePause(1)
@@ -874,8 +919,14 @@ func TestToggleSitePause_NonexistentSite(t *testing.T) {
 func TestGetAllSites_ReturnsCopy(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	injectSite(e, models.Site{ID: 1, Name: "s1", Status: "UP"})
-	injectSite(e, models.Site{ID: 2, Name: "s2", Status: "DOWN"})
+	injectSite(e, models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "s1"},
+		SiteState:  models.SiteState{Status: "UP"},
+	})
+	injectSite(e, models.Site{
+		SiteConfig: models.SiteConfig{ID: 2, Name: "s2"},
+		SiteState:  models.SiteState{Status: "DOWN"},
+	})

 	sites := e.GetAllSites()
 	if len(sites) != 2 {
@@ -894,10 +945,13 @@ func TestGetAllSites_ReturnsCopy(t *testing.T) {
 func TestGetLiveState_ReturnsCopy(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	injectSite(e, models.Site{ID: 1, Name: "s1", Status: "UP"})
+	injectSite(e, models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "s1"},
+		SiteState:  models.SiteState{Status: "UP"},
+	})

 	state := e.GetLiveState()
-	state[1] = models.Site{Name: "mutated"}
+	state[1] = models.Site{SiteConfig: models.SiteConfig{Name: "mutated"}}

 	fresh := e.GetLiveState()
 	if fresh[1].Name == "mutated" {
@@ -1013,7 +1067,8 @@ func TestConcurrent_RecordHeartbeat(t *testing.T) {
 	e := newTestEngine(ms)
 	for i := 0; i < 10; i++ {
 		injectSite(e, models.Site{
-			ID: i + 1, Type: "push", Token: fmt.Sprintf("tok-%d", i+1), Status: "UP",
+			SiteConfig: models.SiteConfig{ID: i + 1, Type: "push", Token: fmt.Sprintf("tok-%d", i+1)},
+			SiteState:  models.SiteState{Status: "UP"},
 		})
 	}

@@ -1031,7 +1086,10 @@ func TestConcurrent_RecordHeartbeat(t *testing.T) {
 func TestConcurrent_HandleStatusChangeAndGetState(t *testing.T) {
 	ms := newMockStore()
 	e := newTestEngine(ms)
-	site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 100}
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 100},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
 	injectSite(e, site)

 	var wg sync.WaitGroup
@@ -1076,6 +1134,344 @@ func TestConcurrent_RecordCheckAndGetHistory(t *testing.T) {
 	}
 }

+// --- Group 10: liveState merge (lost-update race) ---
+
+// A pause that lands while a check is in flight must survive the check's
+// write-back. The old code snapshotted the site, ran the check, then wrote the
+// whole stale struct back — reverting the pause.
+func TestHandleStatusChange_PauseDuringCheckSurvives(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 0},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	injectSite(e, site)
+
+	// `site` is the stale snapshot the check ran against (Paused=false).
+	// Meanwhile the user pauses the monitor.
+	e.ToggleSitePause(1)
+
+	// Check completes and folds its result in using the stale snapshot.
+	e.handleStatusChange(site, "DOWN", 500, 0, "boom")
+
+	s, _ := getSite(e, 1)
+	if !s.Paused {
+		t.Error("pause was reverted by a stale check write-back")
+	}
+	if s.Status != "DOWN" {
+		t.Errorf("expected check result still applied (DOWN), got %s", s.Status)
+	}
+}
+
+// A config edit that lands while a check is in flight must survive; the check
+// must not resurrect the old config from its snapshot.
+func TestHandleStatusChange_ConfigEditDuringCheckSurvives(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", URL: "http://old.com", Type: "http", MaxRetries: 0, Interval: 30},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	injectSite(e, site)
+
+	// Config changes mid-check.
+	e.UpdateSiteConfig(models.SiteConfig{ID: 1, Name: "test", URL: "http://new.com", Type: "http", Interval: 60})
+
+	// Stale check (ran against http://old.com) folds its result in.
+	e.handleStatusChange(site, "UP", 200, 5*time.Millisecond, "")
+
+	s, _ := getSite(e, 1)
+	if s.URL != "http://new.com" {
+		t.Errorf("config edit reverted: URL=%s", s.URL)
+	}
+	if s.Interval != 60 {
+		t.Errorf("config edit reverted: Interval=%d", s.Interval)
+	}
+}
+
+// The classic push false-DOWN: a heartbeat marks the monitor UP while a
+// staleness evaluation (computed from the older LastCheck) is mid-flight.
+// The stale DOWN must not overwrite the fresh heartbeat.
+func TestHandleStatusChange_HeartbeatNotOverwrittenByStaleDown(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	// Snapshot the engine would have taken before evaluating staleness:
+	// LastCheck is old, so checkPush decided "DOWN".
+	snap := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "push", Type: "push", Token: "tok", Interval: 10},
+		SiteState:  models.SiteState{Status: "UP", LastCheck: time.Now().Add(-120 * time.Second)},
+	}
+	injectSite(e, snap)
+
+	// A heartbeat lands first, advancing LastCheck and confirming UP.
+	if !e.RecordHeartbeat("tok") {
+		t.Fatal("heartbeat rejected")
+	}
+
+	// Now the in-flight stale evaluation tries to write DOWN.
+	e.handleStatusChange(snap, "DOWN", 0, 0, "heartbeat missed")
+
+	s, _ := getSite(e, 1)
+	if s.Status != "UP" {
+		t.Errorf("stale DOWN overwrote a fresh heartbeat: status=%s", s.Status)
+	}
+}
+
+// A check result for a site removed mid-check must be dropped, not recreate it.
+func TestHandleStatusChange_RemovedSiteDropped(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", MaxRetries: 0},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	injectSite(e, site)
+
+	e.RemoveSite(1)
+	e.handleStatusChange(site, "DOWN", 500, 0, "boom")
+
+	if _, ok := getSite(e, 1); ok {
+		t.Error("removed site was recreated by a late check write-back")
+	}
+}
+
+// --- Group 11: single DB writer ---
+
+// Writes enqueued through the engine are persisted by the writer goroutine and
+// fully drained when the engine stops — no fire-and-forget, no lost writes.
+func TestDBWriter_DrainsOnStop(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	e.Start(context.Background())
+
+	e.enqueueWrite(writeCheck{siteID: 7, latencyNs: 100, isUp: true})
+	e.enqueueWrite(writeLog{message: "drain-me"})
+
+	e.Stop() // blocks until the writer has drained the queue
+
+	ms.mu.Lock()
+	defer ms.mu.Unlock()
+	gotCheck := false
+	for _, c := range ms.savedChecks {
+		if c.SiteID == 7 {
+			gotCheck = true
+		}
+	}
+	if !gotCheck {
+		t.Error("check was not persisted before Stop returned")
+	}
+	gotLog := false
+	for _, l := range ms.savedLogs {
+		if l == "drain-me" {
+			gotLog = true
+		}
+	}
+	if !gotLog {
+		t.Error("log was not persisted before Stop returned")
+	}
+}
+
+// Stop must be idempotent — safe to call more than once.
+func TestEngineStop_Idempotent(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	e.Start(context.Background())
+	e.Stop()
+	e.Stop() // must not panic or block
+}
+
+// --- Group 12: Phase 3 engine correctness ---
+
+// Groups must not auto-pause when all children are paused — that creates a
+// one-way trap because monitorRoutine skips paused sites.
+func TestCheckGroup_AllPausedNoAutoFreeze(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	group := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "group", Type: "group"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	child1 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 2, Name: "child1", Type: "http", ParentID: 1, Paused: true},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	child2 := models.Site{
+		SiteConfig: models.SiteConfig{ID: 3, Name: "child2", Type: "http", ParentID: 1, Paused: true},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	injectSite(e, group)
+	injectSite(e, child1)
+	injectSite(e, child2)
+
+	e.checkGroup(context.Background(), group)
+
+	s, _ := getSite(e, 1)
+	if s.Paused {
+		t.Error("group must not auto-pause when all children are paused")
+	}
+}
+
+// PENDING→DOWN must honor MaxRetries instead of alerting on first failure.
+func TestHandleStatusChange_PendingRetriesBeforeDown(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "new-monitor", MaxRetries: 2},
+		SiteState:  models.SiteState{Status: "PENDING"},
+	}
+	injectSite(e, site)
+
+	e.handleStatusChange(site, "DOWN", 0, 0, "timeout")
+	s, _ := getSite(e, 1)
+	if s.Status != "PENDING" {
+		t.Errorf("expected PENDING during retry, got %s", s.Status)
+	}
+	if s.FailureCount != 1 {
+		t.Errorf("expected FailureCount 1, got %d", s.FailureCount)
+	}
+
+	e.handleStatusChange(s, "DOWN", 0, 0, "timeout")
+	s, _ = getSite(e, 1)
+	if s.Status != "PENDING" {
+		t.Errorf("expected PENDING during retry 2, got %s", s.Status)
+	}
+
+	e.handleStatusChange(s, "DOWN", 0, 0, "timeout")
+	s, _ = getSite(e, 1)
+	if s.Status != "DOWN" {
+		t.Errorf("expected DOWN after retries exhausted, got %s", s.Status)
+	}
+}
+
+// LATE→DOWN must also honor MaxRetries.
+func TestHandleStatusChange_LateRetriesBeforeDown(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "push-mon", MaxRetries: 1},
+		SiteState:  models.SiteState{Status: "LATE"},
+	}
+	injectSite(e, site)
+
+	e.handleStatusChange(site, "DOWN", 0, 0, "missed heartbeat")
+	s, _ := getSite(e, 1)
+	if s.Status != "LATE" {
+		t.Errorf("expected LATE during retry, got %s", s.Status)
+	}
+
+	e.handleStatusChange(s, "DOWN", 0, 0, "missed heartbeat")
+	s, _ = getSite(e, 1)
+	if s.Status != "DOWN" {
+		t.Errorf("expected DOWN after retries exhausted, got %s", s.Status)
+	}
+}
+
+// Dead probe results must be expired so they don't poison aggregation.
+func TestIngestProbeResult_ExpiresStaleProbes(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", Type: "http", Interval: 30},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	injectSite(e, site)
+
+	e.probeResultsMu.Lock()
+	e.probeResults[1] = map[string]NodeResult{
+		"dead-probe": {
+			NodeID:    "dead-probe",
+			IsUp:      false,
+			CheckedAt: time.Now().Add(-10 * time.Minute),
+		},
+	}
+	e.probeResultsMu.Unlock()
+
+	e.IngestProbeResult("live-probe", 1, 5000, true, "")
+
+	e.probeResultsMu.RLock()
+	_, deadExists := e.probeResults[1]["dead-probe"]
+	_, liveExists := e.probeResults[1]["live-probe"]
+	e.probeResultsMu.RUnlock()
+
+	if deadExists {
+		t.Error("stale probe result should have been expired")
+	}
+	if !liveExists {
+		t.Error("live probe result should still exist")
+	}
+}
+
+// RemoveSite must clean up probeResults.
+func TestRemoveSite_CleansProbeResults(t *testing.T) {
+	ms := newMockStore()
+	e := newTestEngine(ms)
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", Type: "http"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	injectSite(e, site)
+
+	e.probeResultsMu.Lock()
+	e.probeResults[1] = map[string]NodeResult{
+		"node-a": {NodeID: "node-a", IsUp: true, CheckedAt: time.Now()},
+	}
+	e.probeResultsMu.Unlock()
+
+	e.RemoveSite(1)
+
+	e.probeResultsMu.RLock()
+	defer e.probeResultsMu.RUnlock()
+	if _, exists := e.probeResults[1]; exists {
+		t.Error("probe results should be cleaned up after RemoveSite")
+	}
+}
+
+// Maintenance cache resolves parent relationships correctly.
+func TestIsInMaintenance_UsesCache(t *testing.T) {
+	ms := newMockStore()
+	ms.maintenance[10] = true // direct maintenance on group
+	e := newTestEngine(ms)
+	group := models.Site{
+		SiteConfig: models.SiteConfig{ID: 10, Name: "group", Type: "group"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	child := models.Site{
+		SiteConfig: models.SiteConfig{ID: 20, Name: "child", Type: "http", ParentID: 10},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	injectSite(e, group)
+	injectSite(e, child)
+	e.refreshMaintenanceCache(context.Background())
+
+	if !e.isInMaintenance(10) {
+		t.Error("group should be in maintenance (direct)")
+	}
+	if !e.isInMaintenance(20) {
+		t.Error("child should be in maintenance (parent)")
+	}
+	if e.isInMaintenance(99) {
+		t.Error("unknown monitor should not be in maintenance")
+	}
+}
+
+// Global maintenance (monitor_id=0) applies to all monitors.
+func TestIsInMaintenance_GlobalMaintenance(t *testing.T) {
+	ms := newMockStore()
+	ms.maintenance[0] = true
+	e := newTestEngine(ms)
+	site := models.Site{
+		SiteConfig: models.SiteConfig{ID: 1, Name: "test", Type: "http"},
+		SiteState:  models.SiteState{Status: "UP"},
+	}
+	injectSite(e, site)
+	e.refreshMaintenanceCache(context.Background())
+
+	if !e.isInMaintenance(1) {
+		t.Error("all monitors should be in maintenance during global window")
+	}
+}
+
 // --- Utilities ---

 func containsStr(s, substr string) bool {
@@ -11,9 +11,11 @@ var privateRanges []*net.IPNet

 func init() {
 	cidrs := []string{
+		"0.0.0.0/8",
 		"127.0.0.0/8",
 		"::1/128",
 		"10.0.0.0/8",
+		"100.64.0.0/10",
 		"172.16.0.0/12",
 		"192.168.0.0/16",
 		"169.254.0.0/16",
@@ -27,6 +29,9 @@ func init() {
 }

 func isPrivateIP(ip net.IP) bool {
+	if ip.IsUnspecified() || ip.IsMulticast() || ip.IsLoopback() {
+		return true
+	}
 	for _, network := range privateRanges {
 		if network.Contains(ip) {
 			return true
@@ -16,14 +16,14 @@ type SLAReport struct {
 	MTBF        time.Duration
 }

-func ComputeSLA(changes []models.StateChange, currentStatus string, window time.Duration) SLAReport {
+func ComputeSLA(changes []models.StateChange, currentStatus models.Status, window time.Duration) SLAReport {
 	now := time.Now()
 	windowStart := now.Add(-window)

 	report := SLAReport{Window: window}

 	if len(changes) == 0 {
-		if isDown(currentStatus) {
+		if models.Status(currentStatus).IsBroken() {
 			report.UptimePct = 0
 			report.Downtime = window
 		} else {
@@ -40,7 +40,7 @@ func ComputeSLA(changes []models.StateChange, currentStatus string, window time.
 	}

 	// Determine status at window start: last transition before or at windowStart.
-	statusAtStart := "UP"
+	statusAtStart := string(models.StatusUp)
 	for i := len(sorted) - 1; i >= 0; i-- {
 		if !sorted[i].ChangedAt.After(windowStart) {
 			statusAtStart = sorted[i].ToStatus
@@ -51,7 +51,7 @@ func ComputeSLA(changes []models.StateChange, currentStatus string, window time.
 	var upTime, downTime time.Duration
 	var outages []time.Duration
 	cursor := windowStart
-	wasDown := isDown(statusAtStart)
+	wasDown := models.Status(statusAtStart).IsBroken()

 	if wasDown {
 		report.OutageCount = 1
@@ -77,7 +77,7 @@ func ComputeSLA(changes []models.StateChange, currentStatus string, window time.
 			upTime += seg
 		}

-		newDown := isDown(sc.ToStatus)
+		newDown := models.Status(sc.ToStatus).IsBroken()
 		if !wasDown && newDown {
 			report.OutageCount++
 			outageStart = sc.ChangedAt
@@ -127,19 +127,15 @@ func ComputeSLA(changes []models.StateChange, currentStatus string, window time.
 	return report
 }

-func ComputeDailyBreakdown(changes []models.StateChange, currentStatus string, days int) []DayReport {
-	now := time.Now()
+func ComputeDailyBreakdown(changes []models.StateChange, currentStatus models.Status, days int, now time.Time) []DayReport {
 	reports := make([]DayReport, days)

 	for i := 0; i < days; i++ {
-		dayEnd := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()).Add(-time.Duration(i) * 24 * time.Hour)
+		dayStart := time.Date(now.Year(), now.Month(), now.Day()-i, 0, 0, 0, 0, now.Location())
+		dayEnd := time.Date(now.Year(), now.Month(), now.Day()-i+1, 0, 0, 0, 0, now.Location())
 		if i == 0 {
 			dayEnd = now
 		}
-		dayStart := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()).Add(-time.Duration(i) * 24 * time.Hour)
-		if i > 0 {
-			dayEnd = dayStart.Add(24 * time.Hour)
-		}

 		windowChanges := filterChangesForWindow(changes, dayStart, dayEnd)

@@ -160,10 +156,6 @@ type DayReport struct {
 	UptimePct float64
 }

-func isDown(status string) bool {
-	return status == "DOWN" || status == "SSL EXP"
-}
-
 func filterChangesForWindow(changes []models.StateChange, start, end time.Time) []models.StateChange {
 	var filtered []models.StateChange
 	for _, sc := range changes {
@@ -181,7 +173,7 @@ func inferStatusAt(changes []models.StateChange, at time.Time) string {
 			return sc.ToStatus
 		}
 	}
-	return "UP"
+	return string(models.StatusUp)
 }

 func computeSLAForWindow(changes []models.StateChange, statusAtStart string, start, end time.Time) float64 {
@@ -194,7 +186,7 @@ func computeSLAForWindow(changes []models.StateChange, statusAtStart string, sta

 	var upTime, downTime time.Duration
 	cursor := start
-	wasDown := isDown(statusAtStart)
+	wasDown := models.Status(statusAtStart).IsBroken()

 	for _, sc := range sorted {
 		if sc.ChangedAt.Before(start) || !sc.ChangedAt.Before(end) {
@@ -206,7 +198,7 @@ func computeSLAForWindow(changes []models.StateChange, statusAtStart string, sta
 		} else {
 			upTime += seg
 		}
-		wasDown = isDown(sc.ToStatus)
+		wasDown = models.Status(sc.ToStatus).IsBroken()
 		cursor = sc.ChangedAt
 	}

@@ -118,13 +118,14 @@ func TestComputeSLA_LateNotDown(t *testing.T) {
 }

 func TestComputeDailyBreakdown(t *testing.T) {
-	now := time.Now()
+	// Use a fixed time well past midnight so the outage always falls within today's window.
+	now := time.Date(2026, 6, 4, 15, 0, 0, 0, time.UTC)
 	changes := []models.StateChange{
 		{ToStatus: "UP", ChangedAt: now.Add(-1 * time.Hour)},
 		{ToStatus: "DOWN", FromStatus: "UP", ChangedAt: now.Add(-2 * time.Hour)},
 	}

-	days := ComputeDailyBreakdown(changes, "UP", 7)
+	days := ComputeDailyBreakdown(changes, "UP", 7, now)

 	if len(days) != 7 {
 		t.Fatalf("expected 7 days, got %d", len(days))
@@ -136,24 +137,24 @@ func TestComputeDailyBreakdown(t *testing.T) {
 	}
 }

-func TestIsDown(t *testing.T) {
-	if !isDown("DOWN") {
-		t.Error("DOWN should be down")
+func TestIsBroken(t *testing.T) {
+	if !models.StatusDown.IsBroken() {
+		t.Error("DOWN should be broken")
 	}
-	if !isDown("SSL EXP") {
-		t.Error("SSL EXP should be down")
+	if !models.StatusSSLExp.IsBroken() {
+		t.Error("SSL EXP should be broken")
 	}
-	if isDown("UP") {
-		t.Error("UP should not be down")
+	if models.StatusUp.IsBroken() {
+		t.Error("UP should not be broken")
 	}
-	if isDown("LATE") {
-		t.Error("LATE should not be down")
+	if models.StatusLate.IsBroken() {
+		t.Error("LATE should not be broken")
 	}
-	if isDown("STALE") {
-		t.Error("STALE should not be down")
+	if models.StatusStale.IsBroken() {
+		t.Error("STALE should not be broken")
 	}
-	if isDown("PENDING") {
-		t.Error("PENDING should not be down")
+	if models.StatusPending.IsBroken() {
+		t.Error("PENDING should not be broken")
 	}
 }

@@ -3,10 +3,17 @@ package server
 import (
 	"net"
 	"net/http"
+	"strings"
 	"sync"
 	"time"
 )

+// maxVisitors caps the rate-limiter map so a flood of distinct keys can't grow
+// it without bound. With the trusted-proxy gate below, keys come from real peer
+// addresses, so this is a defense-in-depth ceiling rather than the primary
+// guard.
+const maxVisitors = 10000
+
 type visitor struct {
 	tokens   float64
 	lastSeen time.Time
@@ -17,18 +24,26 @@ type RateLimiter struct {
 	visitors map[string]*visitor
 	rate     float64
 	burst    float64
+	trusted  []*net.IPNet
+	stop     chan struct{}
 }

-func NewRateLimiter(requestsPerMinute int) *RateLimiter {
+func NewRateLimiter(requestsPerMinute int, trusted []*net.IPNet) *RateLimiter {
 	rl := &RateLimiter{
 		visitors: make(map[string]*visitor),
 		rate:     float64(requestsPerMinute) / 60.0,
 		burst:    float64(requestsPerMinute),
+		trusted:  trusted,
+		stop:     make(chan struct{}),
 	}
 	go rl.cleanup()
 	return rl
 }

+func (rl *RateLimiter) Stop() {
+	close(rl.stop)
+}
+
 func (rl *RateLimiter) Allow(ip string) bool {
 	rl.mu.Lock()
 	defer rl.mu.Unlock()
@@ -37,6 +52,9 @@ func (rl *RateLimiter) Allow(ip string) bool {
 	now := time.Now()

 	if !exists {
+		if len(rl.visitors) >= maxVisitors {
+			rl.evictOldest()
+		}
 		rl.visitors[ip] = &visitor{tokens: rl.burst - 1, lastSeen: now}
 		return true
 	}
@@ -55,9 +73,28 @@ func (rl *RateLimiter) Allow(ip string) bool {
 	return true
 }

+// evictOldest removes the least-recently-seen visitor. Called only when the map
+// is at capacity, so the O(n) scan is rare. Caller holds rl.mu.
+func (rl *RateLimiter) evictOldest() {
+	var oldestKey string
+	var oldest time.Time
+	for k, v := range rl.visitors {
+		if oldestKey == "" || v.lastSeen.Before(oldest) {
+			oldestKey = k
+			oldest = v.lastSeen
+		}
+	}
+	if oldestKey != "" {
+		delete(rl.visitors, oldestKey)
+	}
+}
+
 func (rl *RateLimiter) cleanup() {
+	ticker := time.NewTicker(5 * time.Minute)
+	defer ticker.Stop()
 	for {
-		time.Sleep(5 * time.Minute)
+		select {
+		case <-ticker.C:
 			rl.mu.Lock()
 			cutoff := time.Now().Add(-10 * time.Minute)
 			for ip, v := range rl.visitors {
@@ -66,23 +103,60 @@ func (rl *RateLimiter) cleanup() {
 				}
 			}
 			rl.mu.Unlock()
+		case <-rl.stop:
+			return
+		}
 	}
 }

-func clientIP(r *http.Request) string {
-	if fwd := r.Header.Get("X-Forwarded-For"); fwd != "" {
-		return fwd
-	}
+// clientIP determines the rate-limit key for a request. X-Forwarded-For is only
+// honored when the immediate peer (RemoteAddr) is a configured trusted proxy;
+// otherwise the header is attacker-controlled and ignored, so a spoofed XFF
+// can't mint unlimited distinct keys (rate-limit bypass + memory DoS). When the
+// peer is trusted, the right-most address that is not itself a trusted proxy is
+// the real client (RFC 7239 right-most-untrusted-hop).
+func clientIP(r *http.Request, trusted []*net.IPNet) string {
 	host, _, err := net.SplitHostPort(r.RemoteAddr)
 	if err != nil {
-		return r.RemoteAddr
+		host = r.RemoteAddr
+	}
+
+	if len(trusted) == 0 || !ipInCIDRs(net.ParseIP(host), trusted) {
+		return host
+	}
+
+	xff := r.Header.Get("X-Forwarded-For")
+	if xff == "" {
+		return host
+	}
+	parts := strings.Split(xff, ",")
+	for i := len(parts) - 1; i >= 0; i-- {
+		ip := net.ParseIP(strings.TrimSpace(parts[i]))
+		if ip == nil {
+			continue
+		}
+		if !ipInCIDRs(ip, trusted) {
+			return ip.String()
+		}
 	}
 	return host
 }

+func ipInCIDRs(ip net.IP, cidrs []*net.IPNet) bool {
+	if ip == nil {
+		return false
+	}
+	for _, c := range cidrs {
+		if c.Contains(ip) {
+			return true
+		}
+	}
+	return false
+}
+
 func RateLimit(limiter *RateLimiter, next http.HandlerFunc) http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
-		if !limiter.Allow(clientIP(r)) {
+		if !limiter.Allow(clientIP(r, limiter.trusted)) {
 			http.Error(w, "Rate limit exceeded", http.StatusTooManyRequests)
 			return
 		}
@@ -5,7 +5,8 @@ import (
 	"encoding/json"
 	"fmt"
 	"html/template"
-	"log"
+	"log/slog"
+	"net"
 	"net/http"
 	"sort"
 	"strings"
@@ -20,6 +21,395 @@ import (

 const maxRequestBody = 1 << 20

+type ServerConfig struct {
+	Port           int
+	EnableStatus   bool
+	Title          string
+	ClusterKey     string
+	TLSCert        string
+	TLSKey         string
+	ClusterMode    string
+	MetricsPublic  bool
+	CORSOrigin     string
+	TrustedProxies []*net.IPNet
+	QuietHTTPLog   bool
+}
+
+type Server struct {
+	cfg      ServerConfig
+	store    store.Store
+	eng      *monitor.Engine
+	pushRL   *RateLimiter
+	probeRL  *RateLimiter
+	backupRL *RateLimiter
+	statusRL *RateLimiter
+}
+
+func NewServer(cfg ServerConfig, s store.Store, eng *monitor.Engine) *Server {
+	return &Server{
+		cfg:      cfg,
+		store:    s,
+		eng:      eng,
+		pushRL:   NewRateLimiter(60, cfg.TrustedProxies),
+		probeRL:  NewRateLimiter(30, cfg.TrustedProxies),
+		backupRL: NewRateLimiter(10, cfg.TrustedProxies),
+		statusRL: NewRateLimiter(120, cfg.TrustedProxies),
+	}
+}
+
+func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server {
+	srv := NewServer(cfg, s, eng)
+	return srv.Start()
+}
+
+func (s *Server) Start() *http.Server {
+	if s.cfg.ClusterKey == "" {
+		slog.Warn("no UPTOP_CLUSTER_SECRET set, cluster API endpoints will reject all requests")
+	}
+
+	if s.cfg.ClusterMode != "" && s.cfg.ClusterMode != "leader" && s.cfg.TLSCert == "" {
+		slog.Warn("cluster mode active without TLS, secrets transmitted in cleartext")
+	}
+
+	handler := s.routes()
+
+	addr := fmt.Sprintf(":%d", s.cfg.Port)
+	httpSrv := &http.Server{
+		Addr:              addr,
+		Handler:           handler,
+		ReadHeaderTimeout: 10 * time.Second,
+		ReadTimeout:       30 * time.Second,
+		WriteTimeout:      60 * time.Second,
+		IdleTimeout:       120 * time.Second,
+	}
+	go func() {
+		if s.cfg.TLSCert != "" && s.cfg.TLSKey != "" {
+			slog.Info("HTTPS server listening", "addr", addr)
+			if err := httpSrv.ListenAndServeTLS(s.cfg.TLSCert, s.cfg.TLSKey); err != nil && err != http.ErrServerClosed {
+				slog.Error("HTTPS server failed", "err", err)
+			}
+		} else {
+			slog.Info("HTTP server listening", "addr", addr)
+			if err := httpSrv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+				slog.Error("HTTP server failed", "err", err)
+			}
+		}
+	}()
+	return httpSrv
+}
+
+func (s *Server) routes() http.Handler {
+	mux := http.NewServeMux()
+
+	mux.HandleFunc("/api/push", RateLimit(s.pushRL, s.handlePush))
+	mux.HandleFunc("/api/health", s.handleHealth)
+	mux.HandleFunc("/api/backup/export", RateLimit(s.backupRL, s.handleExport))
+	mux.HandleFunc("/api/backup/import", RateLimit(s.backupRL, s.handleImport))
+	mux.HandleFunc("/api/import/kuma", RateLimit(s.backupRL, s.handleKumaImport))
+	mux.HandleFunc("/api/probe/register", RateLimit(s.probeRL, s.handleProbeRegister))
+	mux.HandleFunc("/api/probe/assignments", RateLimit(s.probeRL, s.handleProbeAssignments))
+	mux.HandleFunc("/api/probe/results", RateLimit(s.probeRL, s.handleProbeResults))
+	mux.HandleFunc("/metrics", s.handleMetrics)
+
+	if s.cfg.EnableStatus {
+		mux.HandleFunc("/status", RateLimit(s.statusRL, s.handleStatus))
+		mux.HandleFunc("/status/json", RateLimit(s.statusRL, s.handleStatusJSON))
+	}
+
+	handler := securityHeadersMiddleware(mux)
+	if !s.cfg.QuietHTTPLog {
+		handler = loggingMiddleware(s.cfg.TrustedProxies, handler)
+	}
+	if s.cfg.TLSCert != "" {
+		handler = hstsMiddleware(handler)
+	}
+	return handler
+}
+
+func (s *Server) requireAuth(r *http.Request) bool {
+	return s.cfg.ClusterKey != "" && checkSecret(r.Header.Get("X-Uptop-Secret"), s.cfg.ClusterKey)
+}
+
+func (s *Server) handlePush(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet && r.Method != http.MethodPost {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	token := extractBearerToken(r)
+	if token == "" {
+		if qt := r.URL.Query().Get("token"); qt != "" {
+			token = qt
+			slog.Warn("push token in query string is deprecated, use Authorization: Bearer header")
+		}
+	}
+	if token == "" {
+		http.Error(w, "Missing token", http.StatusBadRequest)
+		return
+	}
+	if s.eng.RecordHeartbeat(token) {
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte("OK"))
+	} else {
+		http.Error(w, "Invalid Token", http.StatusNotFound)
+	}
+}
+
+func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	if s.cfg.ClusterKey != "" && !checkSecret(r.Header.Get("X-Uptop-Secret"), s.cfg.ClusterKey) {
+		http.Error(w, "Unauthorized", http.StatusUnauthorized)
+		return
+	}
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write([]byte("OK"))
+}
+
+func (s *Server) handleExport(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	if !s.requireAuth(r) {
+		http.Error(w, "Unauthorized: UPTOP_CLUSTER_SECRET required", http.StatusUnauthorized)
+		return
+	}
+	data, err := s.store.ExportData(r.Context())
+	if err != nil {
+		slog.Error("export failed", "err", err)
+		http.Error(w, "Export failed", http.StatusInternalServerError)
+		return
+	}
+	if r.URL.Query().Get("redact_secrets") != "false" {
+		for i := range data.Alerts {
+			data.Alerts[i].Settings = models.RedactAlertSettings(data.Alerts[i].Type, data.Alerts[i].Settings)
+		}
+	}
+	_ = json.NewEncoder(w).Encode(data) //nolint:errcheck
+}
+
+func (s *Server) handleImport(w http.ResponseWriter, r *http.Request) {
+	if r.Method != "POST" {
+		http.Error(w, "POST required", http.StatusMethodNotAllowed)
+		return
+	}
+	if !s.requireAuth(r) {
+		http.Error(w, "Unauthorized", http.StatusUnauthorized)
+		return
+	}
+	r.Body = http.MaxBytesReader(w, r.Body, maxRequestBody)
+	var data models.Backup
+	if err := json.NewDecoder(r.Body).Decode(&data); err != nil {
+		http.Error(w, "Invalid JSON", http.StatusBadRequest)
+		return
+	}
+	// API import never modifies users — cluster-secret holder shouldn't be
+	// able to replace admin accounts. CLI restore still does full import.
+	data.Users = nil
+	if err := s.store.ImportData(r.Context(), data); err != nil {
+		slog.Error("import failed", "err", err)
+		http.Error(w, "Import failed", http.StatusInternalServerError)
+		return
+	}
+	_, _ = w.Write([]byte("Import Successful (users excluded — manage via CLI or UPTOP_KEYS)"))
+}
+
+func (s *Server) handleKumaImport(w http.ResponseWriter, r *http.Request) {
+	if r.Method != "POST" {
+		http.Error(w, "POST required", http.StatusMethodNotAllowed)
+		return
+	}
+	if !s.requireAuth(r) {
+		http.Error(w, "Unauthorized", http.StatusUnauthorized)
+		return
+	}
+	r.Body = http.MaxBytesReader(w, r.Body, maxRequestBody)
+	var kb importer.KumaBackup
+	if err := json.NewDecoder(r.Body).Decode(&kb); err != nil {
+		slog.Error("invalid Kuma JSON", "err", err)
+		http.Error(w, "Invalid Kuma JSON", http.StatusBadRequest)
+		return
+	}
+	backup := importer.ConvertKuma(&kb)
+	if err := s.store.ImportData(r.Context(), backup); err != nil {
+		slog.Error("Kuma import failed", "err", err)
+		http.Error(w, "Import failed", http.StatusInternalServerError)
+		return
+	}
+	fmt.Fprintf(w, "Imported %d monitors, %d alerts from Kuma v%s", len(backup.Sites), len(backup.Alerts), kb.Version)
+}
+
+func (s *Server) handleProbeRegister(w http.ResponseWriter, r *http.Request) {
+	if r.Method != "POST" {
+		http.Error(w, "POST required", http.StatusMethodNotAllowed)
+		return
+	}
+	if !s.requireAuth(r) {
+		http.Error(w, "Unauthorized", http.StatusUnauthorized)
+		return
+	}
+	r.Body = http.MaxBytesReader(w, r.Body, maxRequestBody)
+	var req struct {
+		ID      string `json:"id"`
+		Name    string `json:"name"`
+		Region  string `json:"region"`
+		Version string `json:"version"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		http.Error(w, "Invalid JSON", http.StatusBadRequest)
+		return
+	}
+	if req.ID == "" {
+		http.Error(w, "id is required", http.StatusBadRequest)
+		return
+	}
+	if err := s.store.RegisterNode(r.Context(), models.ProbeNode{
+		ID: req.ID, Name: req.Name, Region: req.Region, Version: req.Version,
+	}); err != nil {
+		slog.Error("probe registration failed", "err", err)
+		http.Error(w, "Registration failed", http.StatusInternalServerError)
+		return
+	}
+	_ = json.NewEncoder(w).Encode(map[string]bool{"ok": true}) //nolint:errcheck
+}
+
+func (s *Server) handleProbeAssignments(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	if !s.requireAuth(r) {
+		http.Error(w, "Unauthorized", http.StatusUnauthorized)
+		return
+	}
+	nodeID := r.URL.Query().Get("node_id")
+	var nodeRegion string
+	if nodeID != "" {
+		if node, err := s.store.GetNode(r.Context(), nodeID); err == nil {
+			nodeRegion = node.Region
+		}
+	}
+	sites := s.eng.GetAllSites()
+	var assigned []models.Site
+	for _, site := range sites {
+		if site.Paused || site.Type == "push" || site.Type == "group" {
+			continue
+		}
+		if site.Regions != "" && nodeRegion != "" {
+			matched := false
+			for _, reg := range strings.Split(site.Regions, ",") {
+				if strings.TrimSpace(reg) == nodeRegion {
+					matched = true
+					break
+				}
+			}
+			if !matched {
+				continue
+			}
+		}
+		assigned = append(assigned, site)
+	}
+	w.Header().Set("Content-Type", "application/json")
+	_ = json.NewEncoder(w).Encode(map[string][]models.Site{"sites": assigned}) //nolint:errcheck
+}
+
+func (s *Server) handleProbeResults(w http.ResponseWriter, r *http.Request) {
+	if r.Method != "POST" {
+		http.Error(w, "POST required", http.StatusMethodNotAllowed)
+		return
+	}
+	if !s.requireAuth(r) {
+		http.Error(w, "Unauthorized", http.StatusUnauthorized)
+		return
+	}
+	r.Body = http.MaxBytesReader(w, r.Body, maxRequestBody)
+	var req struct {
+		NodeID  string `json:"node_id"`
+		Results []struct {
+			SiteID      int    `json:"site_id"`
+			LatencyNs   int64  `json:"latency_ns"`
+			IsUp        bool   `json:"is_up"`
+			ErrorReason string `json:"error_reason"`
+		} `json:"results"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		http.Error(w, "Invalid JSON", http.StatusBadRequest)
+		return
+	}
+	if req.NodeID == "" {
+		http.Error(w, "node_id is required", http.StatusBadRequest)
+		return
+	}
+	for _, result := range req.Results {
+		s.eng.EnqueueProbeCheck(result.SiteID, req.NodeID, result.LatencyNs, result.IsUp)
+		s.eng.IngestProbeResult(req.NodeID, result.SiteID, result.LatencyNs, result.IsUp, result.ErrorReason)
+	}
+	if err := s.store.UpdateNodeLastSeen(r.Context(), req.NodeID); err != nil {
+		slog.Error("node last-seen update failed", "err", err)
+	}
+	_ = json.NewEncoder(w).Encode(map[string]bool{"ok": true}) //nolint:errcheck
+}
+
+func (s *Server) handleMetrics(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	if !s.cfg.MetricsPublic {
+		if !s.requireAuth(r) {
+			http.Error(w, "Unauthorized", http.StatusUnauthorized)
+			return
+		}
+	}
+	metrics.Handler(s.eng)(w, r)
+}
+
+func (s *Server) handleStatus(w http.ResponseWriter, _ *http.Request) {
+	renderStatusPage(w, s.cfg.Title, s.eng)
+}
+
+func (s *Server) handleStatusJSON(w http.ResponseWriter, r *http.Request) {
+	state := s.eng.GetLiveState()
+	activeWindows, _ := s.store.GetActiveMaintenanceWindows(r.Context())
+	maintSet := make(map[int]bool)
+	allInMaint := false
+	for _, mw := range activeWindows {
+		if mw.Type != "maintenance" {
+			continue
+		}
+		if mw.MonitorID == 0 {
+			allInMaint = true
+		} else {
+			maintSet[mw.MonitorID] = true
+		}
+	}
+	public := make(map[int]statusSite, len(state))
+	for id, site := range state {
+		displayStatus := string(site.Status)
+		if allInMaint || maintSet[site.ID] || (site.ParentID > 0 && maintSet[site.ParentID]) {
+			displayStatus = "MAINT"
+		}
+		public[id] = statusSite{
+			Name:      site.Name,
+			Type:      site.Type,
+			URL:       site.URL,
+			Status:    displayStatus,
+			Paused:    site.Paused,
+			LastCheck: site.LastCheck,
+			Latency:   site.Latency,
+		}
+	}
+	if s.cfg.CORSOrigin != "" {
+		w.Header().Set("Access-Control-Allow-Origin", s.cfg.CORSOrigin)
+	}
+	w.Header().Set("Content-Type", "application/json")
+	_ = json.NewEncoder(w).Encode(public) //nolint:errcheck
+}
+
+// --- Helpers ---
+
 func checkSecret(got, want string) bool {
 	return subtle.ConstantTimeCompare([]byte(got), []byte(want)) == 1
 }
@@ -32,21 +422,78 @@ func extractBearerToken(r *http.Request) string {
 	return ""
 }

-var sensitiveKeys = map[string]bool{
-	"pass": true, "password": true, "token": true,
-	"routing_key": true, "user": true, "username": true,
+// statusSite is the public DTO for /status/json.
+type statusSite struct {
+	Name      string
+	Type      string
+	URL       string
+	Status    string
+	Paused    bool
+	LastCheck time.Time
+	Latency   time.Duration
 }

-func redactSettings(settings map[string]string) map[string]string {
-	redacted := make(map[string]string, len(settings))
-	for k, v := range settings {
-		if sensitiveKeys[k] && v != "" {
-			redacted[k] = "***REDACTED***"
-		} else {
-			redacted[k] = v
+// --- Middleware ---
+
+type statusWriter struct {
+	http.ResponseWriter
+	code int
+}
+
+func (w *statusWriter) WriteHeader(code int) {
+	w.code = code
+	w.ResponseWriter.WriteHeader(code)
+}
+
+func loggingMiddleware(trusted []*net.IPNet, next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		start := time.Now()
+		sw := &statusWriter{ResponseWriter: w, code: 200}
+		next.ServeHTTP(sw, r)
+		path := strings.ReplaceAll(strings.ReplaceAll(r.URL.Path, "\n", ""), "\r", "")
+		slog.Info("http request", "method", r.Method, "path", path, "status", sw.code, "duration", time.Since(start).Round(time.Millisecond), "ip", clientIP(r, trusted)) //nolint:gosec // structured slog, not format string
+	})
+}
+
+func securityHeadersMiddleware(next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("X-Content-Type-Options", "nosniff")
+		w.Header().Set("X-Frame-Options", "DENY")
+		w.Header().Set("Referrer-Policy", "no-referrer")
+		w.Header().Set("Content-Security-Policy", "default-src 'self'; script-src 'unsafe-inline'; style-src 'unsafe-inline'")
+		next.ServeHTTP(w, r)
+	})
+}
+
+func hstsMiddleware(next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Strict-Transport-Security", "max-age=63072000; includeSubDomains")
+		next.ServeHTTP(w, r)
+	})
+}
+
+func renderStatusPage(w http.ResponseWriter, title string, eng *monitor.Engine) {
+	sites := eng.GetAllSites()
+
+	sort.Slice(sites, func(i, j int) bool {
+		if sites[i].Status != sites[j].Status {
+			if sites[i].Status == models.StatusDown {
+				return true
+			}
+			if sites[j].Status == models.StatusDown {
+				return false
 			}
 		}
-	return redacted
+		return sites[i].Name < sites[j].Name
+	})
+
+	data := struct {
+		Title string
+		Sites []models.Site
+	}{Title: title, Sites: sites}
+	if err := statusTpl.Execute(w, data); err != nil {
+		slog.Error("status page render failed", "err", err)
+	}
 }

 var statusTpl = template.Must(template.New("status").Parse(`
@@ -180,395 +627,3 @@ var statusTpl = template.Must(template.New("status").Parse(`
 	</script>
 </body>
 </html>`))
-
-type ServerConfig struct {
-	Port          int
-	EnableStatus  bool
-	Title         string
-	ClusterKey    string
-	TLSCert       string
-	TLSKey        string
-	ClusterMode   string
-	MetricsPublic bool
-	CORSOrigin    string
-}
-
-func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server {
-	if cfg.ClusterKey == "" {
-		fmt.Println("WARNING: No UPTOP_CLUSTER_SECRET set. Cluster API endpoints are unauthenticated.")
-	}
-
-	pushRL := NewRateLimiter(60)
-	probeRL := NewRateLimiter(30)
-	backupRL := NewRateLimiter(10)
-	statusRL := NewRateLimiter(120)
-
-	mux := http.NewServeMux()
-
-	// 1. Push Heartbeat
-	mux.HandleFunc("/api/push", RateLimit(pushRL, func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != http.MethodGet && r.Method != http.MethodPost {
-			http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
-			return
-		}
-		token := extractBearerToken(r)
-		if token == "" {
-			if qt := r.URL.Query().Get("token"); qt != "" {
-				token = qt
-				log.Printf("DEPRECATED: push token in query string — use Authorization: Bearer header instead")
-			}
-		}
-		if token == "" {
-			http.Error(w, "Missing token", http.StatusBadRequest)
-			return
-		}
-		if eng.RecordHeartbeat(token) {
-			w.WriteHeader(http.StatusOK)
-			_, _ = w.Write([]byte("OK"))
-		} else {
-			http.Error(w, "Invalid Token", http.StatusNotFound)
-		}
-	}))
-
-	// 2. Health Check (For Cluster Follower)
-	mux.HandleFunc("/api/health", func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != http.MethodGet {
-			http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
-			return
-		}
-		if cfg.ClusterKey != "" && !checkSecret(r.Header.Get("X-Upkeep-Secret"), cfg.ClusterKey) {
-			http.Error(w, "Unauthorized", http.StatusUnauthorized)
-			return
-		}
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("OK"))
-	})
-
-	// 3. Config Export
-	mux.HandleFunc("/api/backup/export", RateLimit(backupRL, func(w http.ResponseWriter, r *http.Request) {
-		if cfg.ClusterKey == "" || !checkSecret(r.Header.Get("X-Upkeep-Secret"), cfg.ClusterKey) {
-			http.Error(w, "Unauthorized: UPTOP_CLUSTER_SECRET required", http.StatusUnauthorized)
-			return
-		}
-		data, err := s.ExportData()
-		if err != nil {
-			log.Printf("Export failed: %v", err)
-			http.Error(w, "Export failed", http.StatusInternalServerError)
-			return
-		}
-		if r.URL.Query().Get("redact_secrets") != "false" {
-			for i := range data.Alerts {
-				data.Alerts[i].Settings = redactSettings(data.Alerts[i].Settings)
-			}
-		}
-		_ = json.NewEncoder(w).Encode(data) //nolint:errcheck
-	}))
-
-	// 4. Config Import
-	mux.HandleFunc("/api/backup/import", RateLimit(backupRL, func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != "POST" {
-			http.Error(w, "POST required", http.StatusMethodNotAllowed)
-			return
-		}
-		if cfg.ClusterKey == "" || !checkSecret(r.Header.Get("X-Upkeep-Secret"), cfg.ClusterKey) {
-			http.Error(w, "Unauthorized", http.StatusUnauthorized)
-			return
-		}
-		r.Body = http.MaxBytesReader(w, r.Body, maxRequestBody)
-		var data models.Backup
-		if err := json.NewDecoder(r.Body).Decode(&data); err != nil {
-			http.Error(w, "Invalid JSON", http.StatusBadRequest)
-			return
-		}
-		if err := s.ImportData(data); err != nil {
-			log.Printf("Import failed: %v", err)
-			http.Error(w, "Import failed", http.StatusInternalServerError)
-			return
-		}
-		_, _ = w.Write([]byte("Import Successful"))
-	}))
-
-	// 5. Kuma Import
-	mux.HandleFunc("/api/import/kuma", RateLimit(backupRL, func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != "POST" {
-			http.Error(w, "POST required", http.StatusMethodNotAllowed)
-			return
-		}
-		if cfg.ClusterKey == "" || !checkSecret(r.Header.Get("X-Upkeep-Secret"), cfg.ClusterKey) {
-			http.Error(w, "Unauthorized", http.StatusUnauthorized)
-			return
-		}
-		r.Body = http.MaxBytesReader(w, r.Body, maxRequestBody)
-		var kb importer.KumaBackup
-		if err := json.NewDecoder(r.Body).Decode(&kb); err != nil {
-			log.Printf("Invalid Kuma JSON: %v", err)
-			http.Error(w, "Invalid Kuma JSON", http.StatusBadRequest)
-			return
-		}
-		backup := importer.ConvertKuma(&kb)
-		if err := s.ImportData(backup); err != nil {
-			log.Printf("Kuma import failed: %v", err)
-			http.Error(w, "Import failed", http.StatusInternalServerError)
-			return
-		}
-		fmt.Fprintf(w, "Imported %d monitors, %d alerts from Kuma v%s", len(backup.Sites), len(backup.Alerts), kb.Version)
-	}))
-
-	// 6. Probe Registration
-	mux.HandleFunc("/api/probe/register", RateLimit(probeRL, func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != "POST" {
-			http.Error(w, "POST required", http.StatusMethodNotAllowed)
-			return
-		}
-		if cfg.ClusterKey == "" || !checkSecret(r.Header.Get("X-Upkeep-Secret"), cfg.ClusterKey) {
-			http.Error(w, "Unauthorized", http.StatusUnauthorized)
-			return
-		}
-		r.Body = http.MaxBytesReader(w, r.Body, maxRequestBody)
-		var req struct {
-			ID      string `json:"id"`
-			Name    string `json:"name"`
-			Region  string `json:"region"`
-			Version string `json:"version"`
-		}
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, "Invalid JSON", http.StatusBadRequest)
-			return
-		}
-		if req.ID == "" {
-			http.Error(w, "id is required", http.StatusBadRequest)
-			return
-		}
-		if err := s.RegisterNode(models.ProbeNode{
-			ID: req.ID, Name: req.Name, Region: req.Region, Version: req.Version,
-		}); err != nil {
-			log.Printf("Probe register failed: %v", err)
-			http.Error(w, "Registration failed", http.StatusInternalServerError)
-			return
-		}
-		_ = json.NewEncoder(w).Encode(map[string]bool{"ok": true}) //nolint:errcheck
-	}))
-
-	// 7. Probe Assignment Fetch
-	mux.HandleFunc("/api/probe/assignments", RateLimit(probeRL, func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != http.MethodGet {
-			http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
-			return
-		}
-		if cfg.ClusterKey == "" || !checkSecret(r.Header.Get("X-Upkeep-Secret"), cfg.ClusterKey) {
-			http.Error(w, "Unauthorized", http.StatusUnauthorized)
-			return
-		}
-		nodeID := r.URL.Query().Get("node_id")
-		var nodeRegion string
-		if nodeID != "" {
-			if node, err := s.GetNode(nodeID); err == nil {
-				nodeRegion = node.Region
-			}
-		}
-		sites := eng.GetAllSites()
-		var assigned []models.Site
-		for _, site := range sites {
-			if site.Paused || site.Type == "push" || site.Type == "group" {
-				continue
-			}
-			if site.Regions != "" && nodeRegion != "" {
-				matched := false
-				for _, r := range strings.Split(site.Regions, ",") {
-					if strings.TrimSpace(r) == nodeRegion {
-						matched = true
-						break
-					}
-				}
-				if !matched {
-					continue
-				}
-			}
-			assigned = append(assigned, site)
-		}
-		w.Header().Set("Content-Type", "application/json")
-		_ = json.NewEncoder(w).Encode(map[string][]models.Site{"sites": assigned}) //nolint:errcheck
-	}))
-
-	// 8. Probe Result Submission
-	mux.HandleFunc("/api/probe/results", RateLimit(probeRL, func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != "POST" {
-			http.Error(w, "POST required", http.StatusMethodNotAllowed)
-			return
-		}
-		if cfg.ClusterKey == "" || !checkSecret(r.Header.Get("X-Upkeep-Secret"), cfg.ClusterKey) {
-			http.Error(w, "Unauthorized", http.StatusUnauthorized)
-			return
-		}
-		r.Body = http.MaxBytesReader(w, r.Body, maxRequestBody)
-		var req struct {
-			NodeID  string `json:"node_id"`
-			Results []struct {
-				SiteID      int    `json:"site_id"`
-				LatencyNs   int64  `json:"latency_ns"`
-				IsUp        bool   `json:"is_up"`
-				ErrorReason string `json:"error_reason"`
-			} `json:"results"`
-		}
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, "Invalid JSON", http.StatusBadRequest)
-			return
-		}
-		if req.NodeID == "" {
-			http.Error(w, "node_id is required", http.StatusBadRequest)
-			return
-		}
-		for _, result := range req.Results {
-			if err := s.SaveCheckFromNode(result.SiteID, req.NodeID, result.LatencyNs, result.IsUp); err != nil {
-				log.Printf("Failed to save probe result: %v", err)
-			}
-			eng.IngestProbeResult(req.NodeID, result.SiteID, result.LatencyNs, result.IsUp, result.ErrorReason)
-		}
-		if err := s.UpdateNodeLastSeen(req.NodeID); err != nil {
-			log.Printf("Failed to update node last seen: %v", err)
-		}
-		_ = json.NewEncoder(w).Encode(map[string]bool{"ok": true}) //nolint:errcheck
-	}))
-
-	// 9. Prometheus Metrics
-	mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != http.MethodGet {
-			http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
-			return
-		}
-		if !cfg.MetricsPublic && cfg.ClusterKey != "" {
-			if !checkSecret(r.Header.Get("X-Upkeep-Secret"), cfg.ClusterKey) {
-				http.Error(w, "Unauthorized", http.StatusUnauthorized)
-				return
-			}
-		}
-		metrics.Handler(eng)(w, r)
-	})
-
-	// 10. Status Page
-	if cfg.EnableStatus {
-		mux.HandleFunc("/status", RateLimit(statusRL, func(w http.ResponseWriter, r *http.Request) { renderStatusPage(w, cfg.Title, eng) }))
-		mux.HandleFunc("/status/json", RateLimit(statusRL, func(w http.ResponseWriter, r *http.Request) {
-			state := eng.GetLiveState()
-			activeWindows, _ := s.GetActiveMaintenanceWindows()
-			maintSet := make(map[int]bool)
-			allInMaint := false
-			for _, mw := range activeWindows {
-				if mw.Type != "maintenance" {
-					continue
-				}
-				if mw.MonitorID == 0 {
-					allInMaint = true
-				} else {
-					maintSet[mw.MonitorID] = true
-				}
-			}
-			for id, site := range state {
-				site.Token = ""
-				if allInMaint || maintSet[site.ID] || (site.ParentID > 0 && maintSet[site.ParentID]) {
-					site.Status = "MAINT"
-				}
-				state[id] = site
-			}
-			if cfg.CORSOrigin != "" {
-				w.Header().Set("Access-Control-Allow-Origin", cfg.CORSOrigin)
-			}
-			w.Header().Set("Content-Type", "application/json")
-			_ = json.NewEncoder(w).Encode(state) //nolint:errcheck
-		}))
-	}
-
-	if cfg.ClusterMode != "" && cfg.ClusterMode != "leader" && cfg.TLSCert == "" {
-		fmt.Println("WARNING: Cluster mode active without TLS. Secrets transmitted in cleartext.")
-	}
-
-	handler := loggingMiddleware(securityHeadersMiddleware(mux))
-	if cfg.TLSCert != "" {
-		handler = hstsMiddleware(handler)
-	}
-
-	addr := fmt.Sprintf(":%d", cfg.Port)
-	srv := &http.Server{
-		Addr:              addr,
-		Handler:           handler,
-		ReadHeaderTimeout: 10 * time.Second,
-		ReadTimeout:       30 * time.Second,
-		WriteTimeout:      60 * time.Second,
-		IdleTimeout:       120 * time.Second,
-	}
-	go func() {
-		if cfg.TLSCert != "" && cfg.TLSKey != "" {
-			fmt.Printf("HTTPS Server listening on %s\n", addr)
-			if err := srv.ListenAndServeTLS(cfg.TLSCert, cfg.TLSKey); err != nil && err != http.ErrServerClosed {
-				log.Printf("HTTPS server error: %v", err)
-			}
-		} else {
-			fmt.Printf("HTTP Server listening on %s\n", addr)
-			if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
-				log.Printf("HTTP server error: %v", err)
-			}
-		}
-	}()
-	return srv
-}
-
-type statusWriter struct {
-	http.ResponseWriter
-	code int
-}
-
-func (w *statusWriter) WriteHeader(code int) {
-	w.code = code
-	w.ResponseWriter.WriteHeader(code)
-}
-
-func loggingMiddleware(next http.Handler) http.Handler {
-	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		start := time.Now()
-		sw := &statusWriter{ResponseWriter: w, code: 200}
-		next.ServeHTTP(sw, r)
-		path := strings.ReplaceAll(strings.ReplaceAll(r.URL.Path, "\n", ""), "\r", "")
-		log.Printf("%s %s %d %s %s", r.Method, path, sw.code, time.Since(start).Round(time.Millisecond), clientIP(r)) //nolint:gosec // path sanitized above
-	})
-}
-
-func securityHeadersMiddleware(next http.Handler) http.Handler {
-	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("X-Content-Type-Options", "nosniff")
-		w.Header().Set("X-Frame-Options", "DENY")
-		w.Header().Set("Referrer-Policy", "no-referrer")
-		w.Header().Set("Content-Security-Policy", "default-src 'self'; script-src 'unsafe-inline'; style-src 'unsafe-inline'")
-		next.ServeHTTP(w, r)
-	})
-}
-
-func hstsMiddleware(next http.Handler) http.Handler {
-	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("Strict-Transport-Security", "max-age=63072000; includeSubDomains")
-		next.ServeHTTP(w, r)
-	})
-}
-
-func renderStatusPage(w http.ResponseWriter, title string, eng *monitor.Engine) {
-	sites := eng.GetAllSites()
-
-	sort.Slice(sites, func(i, j int) bool {
-		if sites[i].Status != sites[j].Status {
-			if sites[i].Status == "DOWN" {
-				return true
-			}
-			if sites[j].Status == "DOWN" {
-				return false
-			}
-		}
-		return sites[i].Name < sites[j].Name
-	})
-
-	data := struct {
-		Title string
-		Sites []models.Site
-	}{Title: title, Sites: sites}
-	if err := statusTpl.Execute(w, data); err != nil {
-		log.Printf("Failed to render status page: %v", err)
-	}
-}
@@ -2,6 +2,7 @@ package server

 import (
 	"bytes"
+	"context"
 	"encoding/json"
 	"fmt"
 	"net"
@@ -12,13 +13,15 @@ import (

 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store/storetest"
 )

 // --- Mock Store ---

 type mockStore struct {
+	storetest.BaseMock
 	mu              sync.Mutex
-	sites           []models.Site
+	sites           []models.SiteConfig
 	alerts          []models.AlertConfig
 	nodes           map[string]models.ProbeNode
 	importedData    *models.Backup
@@ -32,76 +35,26 @@ func newMockStore() *mockStore {
 	}
 }

-func (m *mockStore) Init() error                                              { return nil }
-func (m *mockStore) GetSites() ([]models.Site, error)                         { return m.sites, nil }
-func (m *mockStore) AddSite(models.Site) error                                { return nil }
-func (m *mockStore) UpdateSite(models.Site) error                             { return nil }
-func (m *mockStore) UpdateSitePaused(int, bool) error                         { return nil }
-func (m *mockStore) DeleteSite(int) error                                     { return nil }
-func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error)              { return m.alerts, nil }
-func (m *mockStore) GetAlert(int) (models.AlertConfig, error)                 { return models.AlertConfig{}, nil }
-func (m *mockStore) AddAlert(string, string, map[string]string) error         { return nil }
-func (m *mockStore) UpdateAlert(int, string, string, map[string]string) error { return nil }
-func (m *mockStore) DeleteAlert(int) error                                    { return nil }
-func (m *mockStore) GetAllUsers() ([]models.User, error)                      { return nil, nil }
-func (m *mockStore) AddUser(string, string, string) error                     { return nil }
-func (m *mockStore) UpdateUser(int, string, string, string) error             { return nil }
-func (m *mockStore) DeleteUser(int) error                                     { return nil }
-func (m *mockStore) SaveCheck(int, int64, bool) error                         { return nil }
-func (m *mockStore) SaveCheckFromNode(siteID int, nodeID string, latencyNs int64, isUp bool) error {
-	return nil
+func (m *mockStore) GetSites(_ context.Context) ([]models.SiteConfig, error) { return m.sites, nil }
+func (m *mockStore) GetAllAlerts(_ context.Context) ([]models.AlertConfig, error) {
+	return m.alerts, nil
 }
-func (m *mockStore) LoadAllHistory(int) (map[int][]models.CheckRecord, error) {
-	return nil, nil
-}
-func (m *mockStore) GetSiteByName(string) (models.Site, error) { return models.Site{}, nil }
-func (m *mockStore) GetAlertByName(string) (models.AlertConfig, error) {
-	return models.AlertConfig{}, nil
-}
-func (m *mockStore) AddSiteReturningID(models.Site) (int, error) { return 0, nil }
-func (m *mockStore) AddAlertReturningID(string, string, map[string]string) (int, error) {
-	return 0, nil
-}
-func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
-func (m *mockStore) UpdateNodeLastSeen(string) error          { return nil }
-func (m *mockStore) DeleteNode(string) error                  { return nil }
-func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
-	return nil, nil
-}
-func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
-func (m *mockStore) SaveLog(string) error                           { return nil }
-func (m *mockStore) LoadLogs(int) ([]string, error)                 { return nil, nil }
-func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
-	return nil, nil
-}
-func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error    { return nil }
-func (m *mockStore) EndMaintenanceWindow(int) error                         { return nil }
-func (m *mockStore) DeleteMaintenanceWindow(int) error                      { return nil }
-func (m *mockStore) IsMonitorInMaintenance(int) (bool, error)               { return false, nil }
-func (m *mockStore) GetPreference(string) (string, error)                   { return "", nil }
-func (m *mockStore) SetPreference(string, string) error                     { return nil }
-func (m *mockStore) SaveStateChange(int, string, string, string) error      { return nil }
-func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
-func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
-	return nil, nil
-}
-func (m *mockStore) Close() error { return nil }

-func (m *mockStore) ExportData() (models.Backup, error) {
+func (m *mockStore) ExportData(_ context.Context) (models.Backup, error) {
 	return models.Backup{
 		Sites:  m.sites,
 		Alerts: m.alerts,
 	}, nil
 }

-func (m *mockStore) ImportData(data models.Backup) error {
+func (m *mockStore) ImportData(_ context.Context, data models.Backup) error {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.importedData = &data
 	return nil
 }

-func (m *mockStore) RegisterNode(node models.ProbeNode) error {
+func (m *mockStore) RegisterNode(_ context.Context, node models.ProbeNode) error {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.registeredNodes = append(m.registeredNodes, node)
@@ -109,7 +62,7 @@ func (m *mockStore) RegisterNode(node models.ProbeNode) error {
 	return nil
 }

-func (m *mockStore) GetNode(id string) (models.ProbeNode, error) {
+func (m *mockStore) GetNode(_ context.Context, id string) (models.ProbeNode, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	if n, ok := m.nodes[id]; ok {
@@ -118,7 +71,7 @@ func (m *mockStore) GetNode(id string) (models.ProbeNode, error) {
 	return models.ProbeNode{}, fmt.Errorf("not found")
 }

-func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
+func (m *mockStore) GetActiveMaintenanceWindows(_ context.Context) ([]models.MaintenanceWindow, error) {
 	return m.maintWindows, nil
 }

@@ -188,7 +141,7 @@ func authReq(method, url, secret string, body []byte) (*http.Response, error) {
 		return nil, err
 	}
 	if secret != "" {
-		req.Header.Set("X-Upkeep-Secret", secret)
+		req.Header.Set("X-Uptop-Secret", secret)
 	}
 	return http.DefaultClient.Do(req)
 }
@@ -299,7 +252,7 @@ func TestExport_Unauthorized_WrongKey(t *testing.T) {

 func TestExport_Success(t *testing.T) {
 	ts := newTestServer(t, "secret", false)
-	ts.store.sites = []models.Site{{ID: 1, Name: "example", URL: "http://example.com"}}
+	ts.store.sites = []models.SiteConfig{{ID: 1, Name: "example", URL: "http://example.com"}}

 	resp, err := authReq("GET", ts.baseURL+"/api/backup/export", "secret", nil)
 	if err != nil {
@@ -346,7 +299,7 @@ func TestImport_Unauthorized(t *testing.T) {
 func TestImport_Success(t *testing.T) {
 	ts := newTestServer(t, "secret", false)
 	backup := models.Backup{
-		Sites: []models.Site{{Name: "imported", URL: "http://example.com"}},
+		Sites: []models.SiteConfig{{Name: "imported", URL: "http://example.com"}},
 	}
 	body, _ := json.Marshal(backup)
 	resp, err := authReq("POST", ts.baseURL+"/api/backup/import", "secret", body)
@@ -476,15 +429,32 @@ func TestStatusPage_Enabled(t *testing.T) {
 	}
 }

-func TestStatusJSON_TokensStripped(t *testing.T) {
+func TestStatusJSON_PublicDTOOnly(t *testing.T) {
 	ts := newTestServer(t, "secret", true)

-	// Inject a site with a token into engine state
-	ts.engine.UpdateSiteConfig(models.Site{ID: 1, Name: "test", Type: "push", Token: "secret-token", Status: "UP"})
-	// Need to inject directly since UpdateSiteConfig only updates existing
-	func() {
-		ts.engine.RecordHeartbeat("unused") // just to exercise, won't match
-	}()
+	// Seed a push monitor (no network IO) through the store and start the
+	// engine so its poll loop loads it into live state — the path real sites
+	// take. The old version of this test injected via UpdateSiteConfig, which
+	// no-ops for unknown IDs, so it asserted over zero sites and passed
+	// against a server that leaked tokens.
+	ts.store.sites = []models.SiteConfig{{
+		ID: 1, Name: "test", Type: "push", Token: "secret-token",
+		Hostname: "internal-host", AlertID: 3,
+	}}
+	ctx, cancel := context.WithCancel(context.Background())
+	ts.engine.Start(ctx)
+	t.Cleanup(func() {
+		cancel()
+		ts.engine.Stop()
+	})
+
+	deadline := time.Now().Add(2 * time.Second)
+	for time.Now().Before(deadline) && len(ts.engine.GetLiveState()) == 0 {
+		time.Sleep(10 * time.Millisecond)
+	}
+	if len(ts.engine.GetLiveState()) == 0 {
+		t.Fatal("engine never loaded the seeded site")
+	}

 	resp, err := http.Get(ts.baseURL + "/status/json")
 	if err != nil {
@@ -494,11 +464,23 @@ func TestStatusJSON_TokensStripped(t *testing.T) {
 	if resp.StatusCode != 200 {
 		t.Errorf("expected 200, got %d", resp.StatusCode)
 	}
-	var state map[string]models.Site
-	json.NewDecoder(resp.Body).Decode(&state)
+
+	// Decode raw so absent struct fields can't mask leaked JSON keys.
+	var state map[string]map[string]any
+	if err := json.NewDecoder(resp.Body).Decode(&state); err != nil {
+		t.Fatal(err)
+	}
+	if len(state) != 1 {
+		t.Fatalf("expected 1 site in status JSON, got %d", len(state))
+	}
 	for _, site := range state {
-		if site.Token != "" {
-			t.Error("expected token stripped from status JSON response")
+		if site["Name"] != "test" {
+			t.Errorf("expected Name to be public, got %v", site["Name"])
+		}
+		for _, leaked := range []string{"Token", "LastError", "Hostname", "Port", "DNSServer", "AlertID", "AcceptedCodes", "Interval"} {
+			if _, ok := site[leaked]; ok {
+				t.Errorf("status JSON leaks internal field %q", leaked)
+			}
 		}
 	}
 }
@@ -561,3 +543,108 @@ func TestProbeAssignments_Unauthorized(t *testing.T) {
 		t.Errorf("expected 401, got %d", resp.StatusCode)
 	}
 }
+
+// --- Security: X-Forwarded-For trusted-proxy handling ---
+
+func mustCIDR(t *testing.T, s string) *net.IPNet {
+	t.Helper()
+	_, n, err := net.ParseCIDR(s)
+	if err != nil {
+		t.Fatalf("ParseCIDR(%q): %v", s, err)
+	}
+	return n
+}
+
+func TestClientIP_TrustedProxyHandling(t *testing.T) {
+	trusted := []*net.IPNet{mustCIDR(t, "10.0.0.0/8")}
+
+	tests := []struct {
+		name       string
+		remoteAddr string
+		xff        string
+		trusted    []*net.IPNet
+		want       string
+	}{
+		{"no trusted proxies ignores XFF", "203.0.113.9:5000", "1.2.3.4", nil, "203.0.113.9"},
+		{"untrusted peer ignores XFF", "203.0.113.9:5000", "1.2.3.4", trusted, "203.0.113.9"},
+		{"trusted peer honors XFF", "10.0.0.5:5000", "1.2.3.4", trusted, "1.2.3.4"},
+		{"trusted peer, rightmost-untrusted hop", "10.0.0.5:5000", "1.2.3.4, 10.0.0.9", trusted, "1.2.3.4"},
+		{"trusted peer, no XFF falls back to peer", "10.0.0.5:5000", "", trusted, "10.0.0.5"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			r, _ := http.NewRequest(http.MethodGet, "/", nil)
+			r.RemoteAddr = tt.remoteAddr
+			if tt.xff != "" {
+				r.Header.Set("X-Forwarded-For", tt.xff)
+			}
+			if got := clientIP(r, tt.trusted); got != tt.want {
+				t.Errorf("clientIP = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+// A spoofed, rotating X-Forwarded-For from an untrusted peer must NOT bypass
+// the limiter: all requests key on the real RemoteAddr, so the bucket trips.
+func TestRateLimit_SpoofedXFFCannotBypass(t *testing.T) {
+	rl := NewRateLimiter(60, nil) // no trusted proxies
+	allowed := 0
+	for i := 0; i < 200; i++ {
+		r, _ := http.NewRequest(http.MethodGet, "/", nil)
+		r.RemoteAddr = "203.0.113.9:5000"
+		r.Header.Set("X-Forwarded-For", fmt.Sprintf("9.9.9.%d", i%256))
+		if rl.Allow(clientIP(r, rl.trusted)) {
+			allowed++
+		}
+	}
+	if allowed > 60 {
+		t.Errorf("spoofed XFF bypassed limiter: %d/200 allowed (burst is 60)", allowed)
+	}
+}
+
+func TestRateLimit_VisitorMapBounded(t *testing.T) {
+	rl := NewRateLimiter(60, nil)
+	for i := 0; i < maxVisitors+500; i++ {
+		rl.Allow(fmt.Sprintf("10.1.%d.%d", i/256, i%256))
+	}
+	rl.mu.Lock()
+	n := len(rl.visitors)
+	rl.mu.Unlock()
+	if n > maxVisitors {
+		t.Errorf("visitor map exceeded cap: %d > %d", n, maxVisitors)
+	}
+}
+
+// --- Security: export redaction allowlist ---
+
+func TestRedactByProvider(t *testing.T) {
+	tests := []struct {
+		name     string
+		typ      string
+		in       map[string]string
+		redacted []string // keys expected to be ***REDACTED***
+		kept     []string // keys expected to survive verbatim
+	}{
+		{"discord url is secret", "discord", map[string]string{"url": "https://discord.com/api/webhooks/1/abc"}, []string{"url"}, nil},
+		{"opsgenie api_key redacted, priority kept", "opsgenie", map[string]string{"api_key": "k", "priority": "P1", "eu": "true"}, []string{"api_key"}, []string{"priority", "eu"}},
+		{"email creds redacted, routing kept", "email", map[string]string{"host": "smtp.x.com", "port": "587", "to": "a@x.com", "from": "b@x.com", "user": "u", "pass": "p"}, []string{"user", "pass"}, []string{"host", "port", "to", "from"}},
+		{"telegram token redacted, chat_id kept", "telegram", map[string]string{"token": "123:ABC", "chat_id": "42"}, []string{"token"}, []string{"chat_id"}},
+		{"unknown provider redacts everything", "mystery", map[string]string{"anything": "x"}, []string{"anything"}, nil},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			out := models.RedactAlertSettings(tt.typ, tt.in)
+			for _, k := range tt.redacted {
+				if out[k] != "***REDACTED***" {
+					t.Errorf("key %q: expected redacted, got %q", k, out[k])
+				}
+			}
+			for _, k := range tt.kept {
+				if out[k] != tt.in[k] {
+					t.Errorf("key %q: expected kept %q, got %q", k, tt.in[k], out[k])
+				}
+			}
+		})
+	}
+}
@@ -5,13 +5,20 @@ import (
 	"strconv"
 )

+type Migration struct {
+	Version int
+	SQL     string
+}
+
 type Dialect interface {
 	DriverName() string
 	CreateTablesSQL() []string
-	MigrationsSQL() []string
+	Migrations() []Migration
+	BaselineVersion() int
 	BoolFalse() string
 	ResetSequenceOnEmpty(db *sql.DB, table string)
 	ImportWipe(tx *sql.Tx)
+	ImportWipeUsers(tx *sql.Tx)
 	ImportResetSequences(tx *sql.Tx)
 	UpsertNodeSQL() string
 	UpsertAlertHealthSQL() string
@@ -2,7 +2,7 @@ package store

 import (
 	"database/sql"
-	"log"
+	"log/slog"

 	_ "github.com/lib/pq"
 )
@@ -15,6 +15,7 @@ func NewPostgresStore(connStr string) (*SQLStore, error) {

 func (d *PostgresDialect) DriverName() string   { return "postgres" }
 func (d *PostgresDialect) BoolFalse() string    { return "FALSE" }
+func (d *PostgresDialect) BaselineVersion() int { return 21 }

 func (d *PostgresDialect) CreateTablesSQL() []string {
 	return []string{
@@ -32,7 +33,8 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
 			method TEXT DEFAULT 'GET', description TEXT DEFAULT '',
 			parent_id INTEGER DEFAULT 0, accepted_codes TEXT DEFAULT '200-299',
 			dns_resolve_type TEXT DEFAULT '', dns_server TEXT DEFAULT '',
-			ignore_tls BOOLEAN DEFAULT FALSE, paused BOOLEAN DEFAULT FALSE
+			ignore_tls BOOLEAN DEFAULT FALSE, paused BOOLEAN DEFAULT FALSE,
+			regions TEXT DEFAULT ''
 		)`,
 		`CREATE TABLE IF NOT EXISTS users (
 			id SERIAL PRIMARY KEY,
@@ -42,20 +44,21 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
 		`CREATE TABLE IF NOT EXISTS check_history (
 			id SERIAL PRIMARY KEY,
 			site_id INTEGER NOT NULL, latency_ns BIGINT,
-			is_up BOOLEAN, checked_at TIMESTAMP DEFAULT NOW()
+			is_up BOOLEAN, checked_at TIMESTAMPTZ DEFAULT NOW(),
+			node_id TEXT DEFAULT ''
 		)`,
 		`CREATE INDEX IF NOT EXISTS idx_check_history_site ON check_history(site_id, checked_at DESC)`,
 		`CREATE TABLE IF NOT EXISTS nodes (
 			id TEXT PRIMARY KEY,
 			name TEXT NOT NULL,
 			region TEXT DEFAULT '',
-			last_seen TIMESTAMP DEFAULT NOW(),
+			last_seen TIMESTAMPTZ DEFAULT NOW(),
 			version TEXT DEFAULT ''
 		)`,
 		`CREATE TABLE IF NOT EXISTS logs (
 			id SERIAL PRIMARY KEY,
 			message TEXT NOT NULL,
-			created_at TIMESTAMP DEFAULT NOW()
+			created_at TIMESTAMPTZ DEFAULT NOW()
 		)`,
 		`CREATE TABLE IF NOT EXISTS maintenance_windows (
 			id SERIAL PRIMARY KEY,
@@ -63,10 +66,10 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
 			title TEXT NOT NULL,
 			description TEXT DEFAULT '',
 			type TEXT DEFAULT 'maintenance',
-			start_time TIMESTAMP NOT NULL,
-			end_time TIMESTAMP,
+			start_time TIMESTAMPTZ NOT NULL,
+			end_time TIMESTAMPTZ,
 			created_by TEXT DEFAULT '',
-			created_at TIMESTAMP DEFAULT NOW()
+			created_at TIMESTAMPTZ DEFAULT NOW()
 		)`,
 		`CREATE TABLE IF NOT EXISTS preferences (
 			key TEXT PRIMARY KEY,
@@ -78,12 +81,12 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
 			from_status TEXT NOT NULL,
 			to_status TEXT NOT NULL,
 			error_reason TEXT DEFAULT '',
-			changed_at TIMESTAMP DEFAULT NOW()
+			changed_at TIMESTAMPTZ DEFAULT NOW()
 		)`,
 		`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
 		`CREATE TABLE IF NOT EXISTS alert_health (
 			alert_id INTEGER PRIMARY KEY,
-			last_send_at TIMESTAMP,
+			last_send_at TIMESTAMPTZ,
 			last_send_ok BOOLEAN DEFAULT FALSE,
 			last_error TEXT DEFAULT '',
 			send_count INTEGER DEFAULT 0,
@@ -92,21 +95,29 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
 	}
 }

-func (d *PostgresDialect) MigrationsSQL() []string {
-	return []string{
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS hostname TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS port INTEGER DEFAULT 0",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS timeout INTEGER DEFAULT 0",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS method TEXT DEFAULT 'GET'",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS description TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS parent_id INTEGER DEFAULT 0",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS accepted_codes TEXT DEFAULT '200-299'",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS dns_resolve_type TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS dns_server TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS ignore_tls BOOLEAN DEFAULT FALSE",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS paused BOOLEAN DEFAULT FALSE",
-		"ALTER TABLE check_history ADD COLUMN IF NOT EXISTS node_id TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN IF NOT EXISTS regions TEXT DEFAULT ''",
+func (d *PostgresDialect) Migrations() []Migration {
+	return []Migration{
+		{1, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS hostname TEXT DEFAULT ''"},
+		{2, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS port INTEGER DEFAULT 0"},
+		{3, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS timeout INTEGER DEFAULT 0"},
+		{4, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS method TEXT DEFAULT 'GET'"},
+		{5, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS description TEXT DEFAULT ''"},
+		{6, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS parent_id INTEGER DEFAULT 0"},
+		{7, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS accepted_codes TEXT DEFAULT '200-299'"},
+		{8, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS dns_resolve_type TEXT DEFAULT ''"},
+		{9, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS dns_server TEXT DEFAULT ''"},
+		{10, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS ignore_tls BOOLEAN DEFAULT FALSE"},
+		{11, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS paused BOOLEAN DEFAULT FALSE"},
+		{12, "ALTER TABLE check_history ADD COLUMN IF NOT EXISTS node_id TEXT DEFAULT ''"},
+		{13, "ALTER TABLE sites ADD COLUMN IF NOT EXISTS regions TEXT DEFAULT ''"},
+		{14, "ALTER TABLE check_history ALTER COLUMN checked_at TYPE TIMESTAMPTZ USING checked_at AT TIME ZONE 'UTC'"},
+		{15, "ALTER TABLE nodes ALTER COLUMN last_seen TYPE TIMESTAMPTZ USING last_seen AT TIME ZONE 'UTC'"},
+		{16, "ALTER TABLE logs ALTER COLUMN created_at TYPE TIMESTAMPTZ USING created_at AT TIME ZONE 'UTC'"},
+		{17, "ALTER TABLE maintenance_windows ALTER COLUMN start_time TYPE TIMESTAMPTZ USING start_time AT TIME ZONE 'UTC'"},
+		{18, "ALTER TABLE maintenance_windows ALTER COLUMN end_time TYPE TIMESTAMPTZ USING end_time AT TIME ZONE 'UTC'"},
+		{19, "ALTER TABLE maintenance_windows ALTER COLUMN created_at TYPE TIMESTAMPTZ USING created_at AT TIME ZONE 'UTC'"},
+		{20, "ALTER TABLE state_changes ALTER COLUMN changed_at TYPE TIMESTAMPTZ USING changed_at AT TIME ZONE 'UTC'"},
+		{21, "ALTER TABLE alert_health ALTER COLUMN last_send_at TYPE TIMESTAMPTZ USING last_send_at AT TIME ZONE 'UTC'"},
 	}
 }

@@ -122,30 +133,42 @@ func (d *PostgresDialect) ResetSequenceOnEmpty(db *sql.DB, table string) {}

 func (d *PostgresDialect) ImportWipe(tx *sql.Tx) {
 	if _, err := tx.Exec("TRUNCATE TABLE sites RESTART IDENTITY CASCADE"); err != nil {
-		log.Printf("import wipe error: %v", err)
+		slog.Debug("import wipe failed", "table", "sites", "err", err)
 	}
 	if _, err := tx.Exec("TRUNCATE TABLE alerts RESTART IDENTITY CASCADE"); err != nil {
-		log.Printf("import wipe error: %v", err)
-	}
-	if _, err := tx.Exec("TRUNCATE TABLE users RESTART IDENTITY CASCADE"); err != nil {
-		log.Printf("import wipe error: %v", err)
+		slog.Debug("import wipe failed", "table", "alerts", "err", err)
 	}
 	if _, err := tx.Exec("TRUNCATE TABLE maintenance_windows RESTART IDENTITY CASCADE"); err != nil {
-		log.Printf("import wipe error: %v", err)
+		slog.Debug("import wipe failed", "table", "maintenance_windows", "err", err)
+	}
+	if _, err := tx.Exec("TRUNCATE TABLE check_history RESTART IDENTITY CASCADE"); err != nil {
+		slog.Debug("import wipe failed", "table", "check_history", "err", err)
+	}
+	if _, err := tx.Exec("TRUNCATE TABLE state_changes RESTART IDENTITY CASCADE"); err != nil {
+		slog.Debug("import wipe failed", "table", "state_changes", "err", err)
+	}
+	if _, err := tx.Exec("TRUNCATE TABLE alert_health RESTART IDENTITY CASCADE"); err != nil {
+		slog.Debug("import wipe failed", "table", "alert_health", "err", err)
+	}
+}
+
+func (d *PostgresDialect) ImportWipeUsers(tx *sql.Tx) {
+	if _, err := tx.Exec("TRUNCATE TABLE users RESTART IDENTITY CASCADE"); err != nil {
+		slog.Debug("import wipe failed", "table", "users", "err", err)
 	}
 }

 func (d *PostgresDialect) ImportResetSequences(tx *sql.Tx) {
 	if _, err := tx.Exec("SELECT setval('sites_id_seq', (SELECT COALESCE(MAX(id), 1) FROM sites))"); err != nil {
-		log.Printf("sequence reset error: %v", err)
+		slog.Debug("sequence reset failed", "table", "sites", "err", err)
 	}
 	if _, err := tx.Exec("SELECT setval('alerts_id_seq', (SELECT COALESCE(MAX(id), 1) FROM alerts))"); err != nil {
-		log.Printf("sequence reset error: %v", err)
+		slog.Debug("sequence reset failed", "table", "alerts", "err", err)
 	}
 	if _, err := tx.Exec("SELECT setval('users_id_seq', (SELECT COALESCE(MAX(id), 1) FROM users))"); err != nil {
-		log.Printf("sequence reset error: %v", err)
+		slog.Debug("sequence reset failed", "table", "users", "err", err)
 	}
 	if _, err := tx.Exec("SELECT setval('maintenance_windows_id_seq', (SELECT COALESCE(MAX(id), 1) FROM maintenance_windows))"); err != nil {
-		log.Printf("sequence reset error: %v", err)
+		slog.Debug("sequence reset failed", "table", "maintenance_windows", "err", err)
 	}
 }
@@ -2,26 +2,43 @@ package store

 import (
 	"database/sql"
-	"log"
+	"fmt"
+	"log/slog"
+	"os"

-	_ "github.com/mattn/go-sqlite3"
+	_ "modernc.org/sqlite"
 )

 type SQLiteDialect struct{}

 func NewSQLiteStore(path string) (*SQLStore, error) {
-	s, err := NewSQLStore("sqlite3", path, &SQLiteDialect{})
+	// Apply pragmas via the DSN so every pooled connection gets them — a
+	// post-open PRAGMA Exec only affects a single connection. WAL allows
+	// concurrent readers alongside the single writer goroutine; busy_timeout
+	// rides out brief lock contention; synchronous=NORMAL is durable under WAL
+	// and far faster than the FULL default. (:memory: is left untouched —
+	// these pragmas are no-ops or harmful for the in-memory test DB.)
+	dsn := path
+	if path != ":memory:" {
+		dsn = fmt.Sprintf("file:%s?_pragma=journal_mode(wal)&_pragma=busy_timeout(5000)&_pragma=synchronous(normal)", path)
+	}
+	s, err := NewSQLStore("sqlite", dsn, &SQLiteDialect{})
 	if err != nil {
 		return nil, err
 	}
-	if _, err := s.db.Exec("PRAGMA journal_mode=WAL"); err != nil {
-		log.Printf("WAL mode failed: %v", err)
+	if path != ":memory:" {
+		for _, suffix := range []string{"", "-wal", "-shm"} {
+			if err := os.Chmod(path+suffix, 0600); err != nil && !os.IsNotExist(err) {
+				slog.Warn("failed to chmod database file", "path", path+suffix, "err", err)
+			}
+		}
 	}
 	return s, nil
 }

-func (d *SQLiteDialect) DriverName() string { return "sqlite3" }
+func (d *SQLiteDialect) DriverName() string   { return "sqlite" }
 func (d *SQLiteDialect) BoolFalse() string    { return "0" }
+func (d *SQLiteDialect) BaselineVersion() int { return 13 }

 func (d *SQLiteDialect) CreateTablesSQL() []string {
 	return []string{
@@ -39,7 +56,8 @@ func (d *SQLiteDialect) CreateTablesSQL() []string {
 			method TEXT DEFAULT 'GET', description TEXT DEFAULT '',
 			parent_id INTEGER DEFAULT 0, accepted_codes TEXT DEFAULT '200-299',
 			dns_resolve_type TEXT DEFAULT '', dns_server TEXT DEFAULT '',
-			ignore_tls BOOLEAN DEFAULT 0, paused BOOLEAN DEFAULT 0
+			ignore_tls BOOLEAN DEFAULT 0, paused BOOLEAN DEFAULT 0,
+			regions TEXT DEFAULT ''
 		)`,
 		`CREATE TABLE IF NOT EXISTS users (
 			id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -49,7 +67,8 @@ func (d *SQLiteDialect) CreateTablesSQL() []string {
 		`CREATE TABLE IF NOT EXISTS check_history (
 			id INTEGER PRIMARY KEY AUTOINCREMENT,
 			site_id INTEGER NOT NULL, latency_ns INTEGER,
-			is_up BOOLEAN, checked_at DATETIME DEFAULT CURRENT_TIMESTAMP
+			is_up BOOLEAN, checked_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+			node_id TEXT DEFAULT ''
 		)`,
 		`CREATE INDEX IF NOT EXISTS idx_check_history_site ON check_history(site_id, checked_at DESC)`,
 		`CREATE TABLE IF NOT EXISTS nodes (
@@ -99,21 +118,21 @@ func (d *SQLiteDialect) CreateTablesSQL() []string {
 	}
 }

-func (d *SQLiteDialect) MigrationsSQL() []string {
-	return []string{
-		"ALTER TABLE sites ADD COLUMN hostname TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN port INTEGER DEFAULT 0",
-		"ALTER TABLE sites ADD COLUMN timeout INTEGER DEFAULT 0",
-		"ALTER TABLE sites ADD COLUMN method TEXT DEFAULT 'GET'",
-		"ALTER TABLE sites ADD COLUMN description TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN parent_id INTEGER DEFAULT 0",
-		"ALTER TABLE sites ADD COLUMN accepted_codes TEXT DEFAULT '200-299'",
-		"ALTER TABLE sites ADD COLUMN dns_resolve_type TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN dns_server TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN ignore_tls BOOLEAN DEFAULT 0",
-		"ALTER TABLE sites ADD COLUMN paused BOOLEAN DEFAULT 0",
-		"ALTER TABLE check_history ADD COLUMN node_id TEXT DEFAULT ''",
-		"ALTER TABLE sites ADD COLUMN regions TEXT DEFAULT ''",
+func (d *SQLiteDialect) Migrations() []Migration {
+	return []Migration{
+		{1, "ALTER TABLE sites ADD COLUMN hostname TEXT DEFAULT ''"},
+		{2, "ALTER TABLE sites ADD COLUMN port INTEGER DEFAULT 0"},
+		{3, "ALTER TABLE sites ADD COLUMN timeout INTEGER DEFAULT 0"},
+		{4, "ALTER TABLE sites ADD COLUMN method TEXT DEFAULT 'GET'"},
+		{5, "ALTER TABLE sites ADD COLUMN description TEXT DEFAULT ''"},
+		{6, "ALTER TABLE sites ADD COLUMN parent_id INTEGER DEFAULT 0"},
+		{7, "ALTER TABLE sites ADD COLUMN accepted_codes TEXT DEFAULT '200-299'"},
+		{8, "ALTER TABLE sites ADD COLUMN dns_resolve_type TEXT DEFAULT ''"},
+		{9, "ALTER TABLE sites ADD COLUMN dns_server TEXT DEFAULT ''"},
+		{10, "ALTER TABLE sites ADD COLUMN ignore_tls BOOLEAN DEFAULT 0"},
+		{11, "ALTER TABLE sites ADD COLUMN paused BOOLEAN DEFAULT 0"},
+		{12, "ALTER TABLE check_history ADD COLUMN node_id TEXT DEFAULT ''"},
+		{13, "ALTER TABLE sites ADD COLUMN regions TEXT DEFAULT ''"},
 	}
 }

@@ -130,35 +149,47 @@ func (d *SQLiteDialect) ResetSequenceOnEmpty(db *sql.DB, table string) {
 	_ = db.QueryRow("SELECT COUNT(*) FROM " + table).Scan(&count) //nolint:errcheck
 	if count == 0 {
 		if _, err := db.Exec("DELETE FROM sqlite_sequence WHERE name=?", table); err != nil {
-			log.Printf("sequence cleanup error: %v", err)
+			slog.Debug("sequence cleanup failed", "table", table, "err", err)
 		}
 	}
 }

 func (d *SQLiteDialect) ImportWipe(tx *sql.Tx) {
 	if _, err := tx.Exec("DELETE FROM sites"); err != nil {
-		log.Printf("import wipe error: %v", err)
+		slog.Debug("import wipe failed", "table", "sites", "err", err)
 	}
 	if _, err := tx.Exec("DELETE FROM sqlite_sequence WHERE name='sites'"); err != nil {
-		log.Printf("import wipe error: %v", err)
+		slog.Debug("import wipe failed", "table", "sqlite_sequence(sites)", "err", err)
 	}
 	if _, err := tx.Exec("DELETE FROM alerts"); err != nil {
-		log.Printf("import wipe error: %v", err)
+		slog.Debug("import wipe failed", "table", "alerts", "err", err)
 	}
 	if _, err := tx.Exec("DELETE FROM sqlite_sequence WHERE name='alerts'"); err != nil {
-		log.Printf("import wipe error: %v", err)
-	}
-	if _, err := tx.Exec("DELETE FROM users"); err != nil {
-		log.Printf("import wipe error: %v", err)
-	}
-	if _, err := tx.Exec("DELETE FROM sqlite_sequence WHERE name='users'"); err != nil {
-		log.Printf("import wipe error: %v", err)
+		slog.Debug("import wipe failed", "table", "sqlite_sequence(alerts)", "err", err)
 	}
 	if _, err := tx.Exec("DELETE FROM maintenance_windows"); err != nil {
-		log.Printf("import wipe error: %v", err)
+		slog.Debug("import wipe failed", "table", "maintenance_windows", "err", err)
 	}
 	if _, err := tx.Exec("DELETE FROM sqlite_sequence WHERE name='maintenance_windows'"); err != nil {
-		log.Printf("import wipe error: %v", err)
+		slog.Debug("import wipe failed", "table", "sqlite_sequence(maintenance_windows)", "err", err)
+	}
+	if _, err := tx.Exec("DELETE FROM check_history"); err != nil {
+		slog.Debug("import wipe failed", "table", "check_history", "err", err)
+	}
+	if _, err := tx.Exec("DELETE FROM state_changes"); err != nil {
+		slog.Debug("import wipe failed", "table", "state_changes", "err", err)
+	}
+	if _, err := tx.Exec("DELETE FROM alert_health"); err != nil {
+		slog.Debug("import wipe failed", "table", "alert_health", "err", err)
+	}
+}
+
+func (d *SQLiteDialect) ImportWipeUsers(tx *sql.Tx) {
+	if _, err := tx.Exec("DELETE FROM users"); err != nil {
+		slog.Debug("import wipe failed", "table", "users", "err", err)
+	}
+	if _, err := tx.Exec("DELETE FROM sqlite_sequence WHERE name='users'"); err != nil {
+		slog.Debug("import wipe failed", "table", "sqlite_sequence(users)", "err", err)
 	}
 }

@@ -1,12 +1,12 @@
 package store

 import (
+	"context"
 	"crypto/rand"
 	"database/sql"
 	"encoding/hex"
 	"encoding/json"
 	"fmt"
-	"strings"
 	"time"

 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
@@ -14,9 +14,9 @@ import (

 const (
 	maxCheckHistory        = 1000
-	checkHistoryPruneAt  = 1100
+	maxLogRows             = 200
+	maxStateChangesPerSite = 5000
 	maxMaintenanceExport   = 1000
-	maxRequestBody       = 1 << 20
 )

 type SQLStore struct {
@@ -72,38 +72,59 @@ func (s *SQLStore) Close() error {
 	return s.db.Close()
 }

-func (s *SQLStore) Init() error {
+func (s *SQLStore) Init(ctx context.Context) error {
 	for _, stmt := range s.dialect.CreateTablesSQL() {
-		if _, err := s.db.Exec(stmt); err != nil {
+		if _, err := s.db.ExecContext(ctx, stmt); err != nil {
 			return err
 		}
 	}
-	for _, m := range s.dialect.MigrationsSQL() {
-		if _, err := s.db.Exec(m); err != nil {
-			errMsg := err.Error()
-			if strings.Contains(errMsg, "already exists") || strings.Contains(errMsg, "duplicate column") {
+
+	if _, err := s.db.ExecContext(ctx, `CREATE TABLE IF NOT EXISTS schema_version (
+		version INTEGER PRIMARY KEY,
+		applied_at DATETIME DEFAULT CURRENT_TIMESTAMP
+	)`); err != nil {
+		return fmt.Errorf("create schema_version: %w", err)
+	}
+
+	var current int
+	_ = s.db.QueryRowContext(ctx, "SELECT COALESCE(MAX(version), 0) FROM schema_version").Scan(&current) //nolint:errcheck
+
+	if current == 0 {
+		baseline := s.dialect.BaselineVersion()
+		if _, err := s.db.ExecContext(ctx, s.q("INSERT INTO schema_version (version) VALUES (?)"), baseline); err != nil {
+			return fmt.Errorf("seed baseline version: %w", err)
+		}
+		current = baseline
+	}
+
+	for _, m := range s.dialect.Migrations() {
+		if m.Version <= current {
 			continue
 		}
-			return fmt.Errorf("migration failed: %w", err)
+		if _, err := s.db.ExecContext(ctx, m.SQL); err != nil {
+			return fmt.Errorf("migration %d failed: %w", m.Version, err)
+		}
+		if _, err := s.db.ExecContext(ctx, s.q("INSERT INTO schema_version (version) VALUES (?)"), m.Version); err != nil {
+			return fmt.Errorf("record migration %d: %w", m.Version, err)
 		}
 	}
 	return nil
 }

-func (s *SQLStore) GetSites() ([]models.Site, error) {
+func (s *SQLStore) GetSites(ctx context.Context) ([]models.SiteConfig, error) {
 	bf := s.dialect.BoolFalse()
 	query := fmt.Sprintf( //nolint:gosec // bf is a dialect boolean literal, not user input
 		"SELECT id, COALESCE(name, url), url, COALESCE(type, 'http'), COALESCE(token, ''), interval, alert_id, check_ssl, threshold, max_retries, COALESCE(hostname, ''), COALESCE(port, 0), COALESCE(timeout, 0), COALESCE(method, 'GET'), COALESCE(description, ''), COALESCE(parent_id, 0), COALESCE(accepted_codes, '200-299'), COALESCE(dns_resolve_type, ''), COALESCE(dns_server, ''), COALESCE(ignore_tls, %s), COALESCE(paused, %s), COALESCE(regions, '') FROM sites",
 		bf, bf,
 	)
-	rows, err := s.db.Query(query)
+	rows, err := s.db.QueryContext(ctx, query)
 	if err != nil {
 		return nil, err
 	}
 	defer rows.Close()
-	var sites []models.Site
+	var sites []models.SiteConfig
 	for rows.Next() {
-		var st models.Site
+		var st models.SiteConfig
 		if err := rows.Scan(&st.ID, &st.Name, &st.URL, &st.Type, &st.Token, &st.Interval, &st.AlertID,
 			&st.CheckSSL, &st.ExpiryThreshold, &st.MaxRetries, &st.Hostname, &st.Port, &st.Timeout,
 			&st.Method, &st.Description, &st.ParentID, &st.AcceptedCodes, &st.DNSResolveType,
@@ -115,7 +136,7 @@ func (s *SQLStore) GetSites() ([]models.Site, error) {
 	return sites, rows.Err()
 }

-func (s *SQLStore) AddSite(site models.Site) error {
+func (s *SQLStore) AddSite(ctx context.Context, site models.SiteConfig) error {
 	token := ""
 	if site.Type == "push" {
 		var err error
@@ -124,15 +145,17 @@ func (s *SQLStore) AddSite(site models.Site) error {
 			return fmt.Errorf("generate push token: %w", err)
 		}
 	}
-	_, err := s.db.Exec(s.q("INSERT INTO sites (name, url, type, token, interval, alert_id, check_ssl, threshold, max_retries, hostname, port, timeout, method, description, parent_id, accepted_codes, dns_resolve_type, dns_server, ignore_tls, paused, regions) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"),
+	_, err := s.db.ExecContext(ctx, s.q("INSERT INTO sites (name, url, type, token, interval, alert_id, check_ssl, threshold, max_retries, hostname, port, timeout, method, description, parent_id, accepted_codes, dns_resolve_type, dns_server, ignore_tls, paused, regions) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"),
 		site.Name, site.URL, site.Type, token, site.Interval, site.AlertID, site.CheckSSL, site.ExpiryThreshold, site.MaxRetries,
 		site.Hostname, site.Port, site.Timeout, site.Method, site.Description, site.ParentID, site.AcceptedCodes, site.DNSResolveType, site.DNSServer, site.IgnoreTLS, site.Paused, site.Regions)
 	return err
 }

-func (s *SQLStore) UpdateSite(site models.Site) error {
+func (s *SQLStore) UpdateSite(ctx context.Context, site models.SiteConfig) error {
 	var existingToken string
-	_ = s.db.QueryRow(s.q("SELECT token FROM sites WHERE id=?"), site.ID).Scan(&existingToken) //nolint:errcheck
+	if err := s.db.QueryRowContext(ctx, s.q("SELECT token FROM sites WHERE id=?"), site.ID).Scan(&existingToken); err != nil && err != sql.ErrNoRows {
+		return fmt.Errorf("read existing token: %w", err)
+	}
 	if site.Type == "push" && existingToken == "" {
 		var err error
 		existingToken, err = generateToken()
@@ -140,34 +163,50 @@ func (s *SQLStore) UpdateSite(site models.Site) error {
 			return fmt.Errorf("generate push token: %w", err)
 		}
 	}
-	_, err := s.db.Exec(s.q("UPDATE sites SET name=?, url=?, type=?, token=?, interval=?, alert_id=?, check_ssl=?, threshold=?, max_retries=?, hostname=?, port=?, timeout=?, method=?, description=?, parent_id=?, accepted_codes=?, dns_resolve_type=?, dns_server=?, ignore_tls=?, paused=?, regions=? WHERE id=?"),
+	_, err := s.db.ExecContext(ctx, s.q("UPDATE sites SET name=?, url=?, type=?, token=?, interval=?, alert_id=?, check_ssl=?, threshold=?, max_retries=?, hostname=?, port=?, timeout=?, method=?, description=?, parent_id=?, accepted_codes=?, dns_resolve_type=?, dns_server=?, ignore_tls=?, paused=?, regions=? WHERE id=?"),
 		site.Name, site.URL, site.Type, existingToken, site.Interval, site.AlertID, site.CheckSSL, site.ExpiryThreshold, site.MaxRetries,
 		site.Hostname, site.Port, site.Timeout, site.Method, site.Description, site.ParentID, site.AcceptedCodes, site.DNSResolveType, site.DNSServer, site.IgnoreTLS, site.Paused, site.Regions, site.ID)
 	return err
 }

-func (s *SQLStore) UpdateSitePaused(id int, paused bool) error {
-	_, err := s.db.Exec(s.q("UPDATE sites SET paused=? WHERE id=?"), paused, id)
+func (s *SQLStore) UpdateSitePaused(ctx context.Context, id int, paused bool) error {
+	_, err := s.db.ExecContext(ctx, s.q("UPDATE sites SET paused=? WHERE id=?"), paused, id)
 	return err
 }

-func (s *SQLStore) DeleteSite(id int) error {
-	_, err := s.db.Exec(s.q("DELETE FROM sites WHERE id=?"), id)
+func (s *SQLStore) DeleteSite(ctx context.Context, id int) error {
+	tx, err := s.db.BeginTx(ctx, nil)
 	if err != nil {
 		return err
 	}
+	defer func() { _ = tx.Rollback() }()
+
+	for _, q := range []string{
+		"DELETE FROM maintenance_windows WHERE monitor_id = ?",
+		"DELETE FROM check_history WHERE site_id = ?",
+		"DELETE FROM state_changes WHERE site_id = ?",
+		"DELETE FROM sites WHERE id = ?",
+	} {
+		if _, err := tx.ExecContext(ctx, s.q(q), id); err != nil {
+			return err
+		}
+	}
+
+	if err := tx.Commit(); err != nil {
+		return err
+	}
 	s.dialect.ResetSequenceOnEmpty(s.db, "sites")
 	return nil
 }

-func (s *SQLStore) GetSiteByName(name string) (models.Site, error) {
+func (s *SQLStore) GetSiteByName(ctx context.Context, name string) (models.SiteConfig, error) {
 	bf := s.dialect.BoolFalse()
 	query := fmt.Sprintf( //nolint:gosec // bf is a dialect boolean literal, not user input
 		"SELECT id, COALESCE(name, url), url, COALESCE(type, 'http'), COALESCE(token, ''), interval, alert_id, check_ssl, threshold, max_retries, COALESCE(hostname, ''), COALESCE(port, 0), COALESCE(timeout, 0), COALESCE(method, 'GET'), COALESCE(description, ''), COALESCE(parent_id, 0), COALESCE(accepted_codes, '200-299'), COALESCE(dns_resolve_type, ''), COALESCE(dns_server, ''), COALESCE(ignore_tls, %s), COALESCE(paused, %s), COALESCE(regions, '') FROM sites WHERE name = %s",
 		bf, bf, s.q("?"),
 	)
-	var st models.Site
-	err := s.db.QueryRow(query, name).Scan(&st.ID, &st.Name, &st.URL, &st.Type, &st.Token, &st.Interval, &st.AlertID,
+	var st models.SiteConfig
+	err := s.db.QueryRowContext(ctx, query, name).Scan(&st.ID, &st.Name, &st.URL, &st.Type, &st.Token, &st.Interval, &st.AlertID,
 		&st.CheckSSL, &st.ExpiryThreshold, &st.MaxRetries, &st.Hostname, &st.Port, &st.Timeout,
 		&st.Method, &st.Description, &st.ParentID, &st.AcceptedCodes, &st.DNSResolveType,
 		&st.DNSServer, &st.IgnoreTLS, &st.Paused, &st.Regions)
@@ -194,10 +233,10 @@ func (s *SQLStore) marshalSettings(settings map[string]string) (string, error) {
 	return s.encryptSettings(string(jsonBytes))
 }

-func (s *SQLStore) GetAlertByName(name string) (models.AlertConfig, error) {
+func (s *SQLStore) GetAlertByName(ctx context.Context, name string) (models.AlertConfig, error) {
 	var a models.AlertConfig
 	var settingsRaw string
-	err := s.db.QueryRow(s.q("SELECT id, name, type, settings FROM alerts WHERE name = ?"), name).Scan(&a.ID, &a.Name, &a.Type, &settingsRaw)
+	err := s.db.QueryRowContext(ctx, s.q("SELECT id, name, type, settings FROM alerts WHERE name = ?"), name).Scan(&a.ID, &a.Name, &a.Type, &settingsRaw)
 	if err != nil {
 		return a, err
 	}
@@ -208,7 +247,7 @@ func (s *SQLStore) GetAlertByName(name string) (models.AlertConfig, error) {
 	return a, nil
 }

-func (s *SQLStore) AddSiteReturningID(site models.Site) (int, error) {
+func (s *SQLStore) AddSiteReturningID(ctx context.Context, site models.SiteConfig) (int, error) {
 	token := ""
 	if site.Type == "push" {
 		var err error
@@ -219,12 +258,12 @@ func (s *SQLStore) AddSiteReturningID(site models.Site) (int, error) {
 	}
 	if s.dollar {
 		var id int
-		err := s.db.QueryRow(s.q("INSERT INTO sites (name, url, type, token, interval, alert_id, check_ssl, threshold, max_retries, hostname, port, timeout, method, description, parent_id, accepted_codes, dns_resolve_type, dns_server, ignore_tls, paused, regions) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) RETURNING id"),
+		err := s.db.QueryRowContext(ctx, s.q("INSERT INTO sites (name, url, type, token, interval, alert_id, check_ssl, threshold, max_retries, hostname, port, timeout, method, description, parent_id, accepted_codes, dns_resolve_type, dns_server, ignore_tls, paused, regions) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) RETURNING id"),
 			site.Name, site.URL, site.Type, token, site.Interval, site.AlertID, site.CheckSSL, site.ExpiryThreshold, site.MaxRetries,
 			site.Hostname, site.Port, site.Timeout, site.Method, site.Description, site.ParentID, site.AcceptedCodes, site.DNSResolveType, site.DNSServer, site.IgnoreTLS, site.Paused, site.Regions).Scan(&id)
 		return id, err
 	}
-	result, err := s.db.Exec(s.q("INSERT INTO sites (name, url, type, token, interval, alert_id, check_ssl, threshold, max_retries, hostname, port, timeout, method, description, parent_id, accepted_codes, dns_resolve_type, dns_server, ignore_tls, paused, regions) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"),
+	result, err := s.db.ExecContext(ctx, s.q("INSERT INTO sites (name, url, type, token, interval, alert_id, check_ssl, threshold, max_retries, hostname, port, timeout, method, description, parent_id, accepted_codes, dns_resolve_type, dns_server, ignore_tls, paused, regions) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"),
 		site.Name, site.URL, site.Type, token, site.Interval, site.AlertID, site.CheckSSL, site.ExpiryThreshold, site.MaxRetries,
 		site.Hostname, site.Port, site.Timeout, site.Method, site.Description, site.ParentID, site.AcceptedCodes, site.DNSResolveType, site.DNSServer, site.IgnoreTLS, site.Paused, site.Regions)
 	if err != nil {
@@ -234,17 +273,17 @@ func (s *SQLStore) AddSiteReturningID(site models.Site) (int, error) {
 	return int(id), err
 }

-func (s *SQLStore) AddAlertReturningID(name, aType string, settings map[string]string) (int, error) {
+func (s *SQLStore) AddAlertReturningID(ctx context.Context, name, aType string, settings map[string]string) (int, error) {
 	stored, err := s.marshalSettings(settings)
 	if err != nil {
 		return 0, err
 	}
 	if s.dollar {
 		var id int
-		err := s.db.QueryRow(s.q("INSERT INTO alerts (name, type, settings) VALUES (?, ?, ?) RETURNING id"), name, aType, stored).Scan(&id)
+		err := s.db.QueryRowContext(ctx, s.q("INSERT INTO alerts (name, type, settings) VALUES (?, ?, ?) RETURNING id"), name, aType, stored).Scan(&id)
 		return id, err
 	}
-	result, err := s.db.Exec(s.q("INSERT INTO alerts (name, type, settings) VALUES (?, ?, ?)"), name, aType, stored)
+	result, err := s.db.ExecContext(ctx, s.q("INSERT INTO alerts (name, type, settings) VALUES (?, ?, ?)"), name, aType, stored)
 	if err != nil {
 		return 0, err
 	}
@@ -252,8 +291,8 @@ func (s *SQLStore) AddAlertReturningID(name, aType string, settings map[string]s
 	return int(id), err
 }

-func (s *SQLStore) GetAllAlerts() ([]models.AlertConfig, error) {
-	rows, err := s.db.Query("SELECT id, name, type, settings FROM alerts")
+func (s *SQLStore) GetAllAlerts(ctx context.Context) ([]models.AlertConfig, error) {
+	rows, err := s.db.QueryContext(ctx, "SELECT id, name, type, settings FROM alerts")
 	if err != nil {
 		return nil, err
 	}
@@ -274,10 +313,10 @@ func (s *SQLStore) GetAllAlerts() ([]models.AlertConfig, error) {
 	return alerts, rows.Err()
 }

-func (s *SQLStore) GetAlert(id int) (models.AlertConfig, error) {
+func (s *SQLStore) GetAlert(ctx context.Context, id int) (models.AlertConfig, error) {
 	var a models.AlertConfig
 	var settingsRaw string
-	err := s.db.QueryRow(s.q("SELECT id, name, type, settings FROM alerts WHERE id = ?"), id).Scan(&a.ID, &a.Name, &a.Type, &settingsRaw)
+	err := s.db.QueryRowContext(ctx, s.q("SELECT id, name, type, settings FROM alerts WHERE id = ?"), id).Scan(&a.ID, &a.Name, &a.Type, &settingsRaw)
 	if err != nil {
 		return a, err
 	}
@@ -288,35 +327,37 @@ func (s *SQLStore) GetAlert(id int) (models.AlertConfig, error) {
 	return a, nil
 }

-func (s *SQLStore) AddAlert(name, aType string, settings map[string]string) error {
+func (s *SQLStore) AddAlert(ctx context.Context, name, aType string, settings map[string]string) error {
 	stored, err := s.marshalSettings(settings)
 	if err != nil {
 		return err
 	}
-	_, err = s.db.Exec(s.q("INSERT INTO alerts (name, type, settings) VALUES (?, ?, ?)"), name, aType, stored)
+	_, err = s.db.ExecContext(ctx, s.q("INSERT INTO alerts (name, type, settings) VALUES (?, ?, ?)"), name, aType, stored)
 	return err
 }

-func (s *SQLStore) UpdateAlert(id int, name, aType string, settings map[string]string) error {
+func (s *SQLStore) UpdateAlert(ctx context.Context, id int, name, aType string, settings map[string]string) error {
 	stored, err := s.marshalSettings(settings)
 	if err != nil {
 		return err
 	}
-	_, err = s.db.Exec(s.q("UPDATE alerts SET name=?, type=?, settings=? WHERE id=?"), name, aType, stored, id)
+	_, err = s.db.ExecContext(ctx, s.q("UPDATE alerts SET name=?, type=?, settings=? WHERE id=?"), name, aType, stored, id)
 	return err
 }

-func (s *SQLStore) DeleteAlert(id int) error {
-	_, err := s.db.Exec(s.q("DELETE FROM alerts WHERE id=?"), id)
-	if err != nil {
+func (s *SQLStore) DeleteAlert(ctx context.Context, id int) error {
+	if _, err := s.db.ExecContext(ctx, s.q("UPDATE sites SET alert_id = 0 WHERE alert_id = ?"), id); err != nil {
+		return err
+	}
+	if _, err := s.db.ExecContext(ctx, s.q("DELETE FROM alerts WHERE id=?"), id); err != nil {
 		return err
 	}
 	s.dialect.ResetSequenceOnEmpty(s.db, "alerts")
 	return nil
 }

-func (s *SQLStore) GetAllUsers() ([]models.User, error) {
-	rows, err := s.db.Query("SELECT id, username, public_key, role FROM users")
+func (s *SQLStore) GetAllUsers(ctx context.Context) ([]models.User, error) {
+	rows, err := s.db.QueryContext(ctx, "SELECT id, username, public_key, role FROM users")
 	if err != nil {
 		return nil, err
 	}
@@ -332,29 +373,29 @@ func (s *SQLStore) GetAllUsers() ([]models.User, error) {
 	return users, rows.Err()
 }

-func (s *SQLStore) AddUser(username, publicKey, role string) error {
-	_, err := s.db.Exec(s.q("INSERT INTO users (username, public_key, role) VALUES (?, ?, ?)"), username, publicKey, role)
+func (s *SQLStore) AddUser(ctx context.Context, username, publicKey, role string) error {
+	_, err := s.db.ExecContext(ctx, s.q("INSERT INTO users (username, public_key, role) VALUES (?, ?, ?)"), username, publicKey, role)
 	return err
 }

-func (s *SQLStore) UpdateUser(id int, username, publicKey, role string) error {
-	_, err := s.db.Exec(s.q("UPDATE users SET username=?, public_key=?, role=? WHERE id=?"), username, publicKey, role, id)
+func (s *SQLStore) UpdateUser(ctx context.Context, id int, username, publicKey, role string) error {
+	_, err := s.db.ExecContext(ctx, s.q("UPDATE users SET username=?, public_key=?, role=? WHERE id=?"), username, publicKey, role, id)
 	return err
 }

-func (s *SQLStore) DeleteUser(id int) error {
-	_, err := s.db.Exec(s.q("DELETE FROM users WHERE id=?"), id)
+func (s *SQLStore) DeleteUser(ctx context.Context, id int) error {
+	_, err := s.db.ExecContext(ctx, s.q("DELETE FROM users WHERE id=?"), id)
 	return err
 }

-func (s *SQLStore) SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error {
-	_, err := s.db.Exec(s.q("INSERT INTO state_changes (site_id, from_status, to_status, error_reason) VALUES (?, ?, ?, ?)"),
+func (s *SQLStore) SaveStateChange(ctx context.Context, siteID int, fromStatus, toStatus, errorReason string) error {
+	_, err := s.db.ExecContext(ctx, s.q("INSERT INTO state_changes (site_id, from_status, to_status, error_reason) VALUES (?, ?, ?, ?)"),
 		siteID, fromStatus, toStatus, errorReason)
 	return err
 }

-func (s *SQLStore) GetStateChanges(siteID int, limit int) ([]models.StateChange, error) {
-	rows, err := s.db.Query(s.q("SELECT id, site_id, from_status, to_status, error_reason, changed_at FROM state_changes WHERE site_id = ? ORDER BY changed_at DESC LIMIT ?"), siteID, limit)
+func (s *SQLStore) GetStateChanges(ctx context.Context, siteID int, limit int) ([]models.StateChange, error) {
+	rows, err := s.db.QueryContext(ctx, s.q("SELECT id, site_id, from_status, to_status, error_reason, changed_at FROM state_changes WHERE site_id = ? ORDER BY changed_at DESC LIMIT ?"), siteID, limit)
 	if err != nil {
 		return nil, err
 	}
@@ -370,8 +411,8 @@ func (s *SQLStore) GetStateChanges(siteID int, limit int) ([]models.StateChange,
 	return changes, rows.Err()
 }

-func (s *SQLStore) GetStateChangesSince(siteID int, since time.Time) ([]models.StateChange, error) {
-	rows, err := s.db.Query(s.q("SELECT id, site_id, from_status, to_status, error_reason, changed_at FROM state_changes WHERE site_id = ? AND changed_at >= ? ORDER BY changed_at DESC"), siteID, since)
+func (s *SQLStore) GetStateChangesSince(ctx context.Context, siteID int, since time.Time) ([]models.StateChange, error) {
+	rows, err := s.db.QueryContext(ctx, s.q("SELECT id, site_id, from_status, to_status, error_reason, changed_at FROM state_changes WHERE site_id = ? AND changed_at >= ? ORDER BY changed_at DESC"), siteID, since)
 	if err != nil {
 		return nil, err
 	}
@@ -387,41 +428,59 @@ func (s *SQLStore) GetStateChangesSince(siteID int, since time.Time) ([]models.S
 	return changes, rows.Err()
 }

-func (s *SQLStore) SaveCheck(siteID int, latencyNs int64, isUp bool) error {
-	return s.SaveCheckFromNode(siteID, "", latencyNs, isUp)
+func (s *SQLStore) SaveCheck(ctx context.Context, siteID int, latencyNs int64, isUp bool) error {
+	return s.SaveCheckFromNode(ctx, siteID, "", latencyNs, isUp)
 }

-func (s *SQLStore) SaveCheckFromNode(siteID int, nodeID string, latencyNs int64, isUp bool) error {
-	_, err := s.db.Exec(s.q("INSERT INTO check_history (site_id, node_id, latency_ns, is_up) VALUES (?, ?, ?, ?)"), siteID, nodeID, latencyNs, isUp)
-	if err != nil {
+// SaveCheckFromNode inserts a single check row. Retention is handled out of
+// band by PruneCheckHistory on a timer, not per-insert, to keep the write hot
+// path a plain INSERT.
+func (s *SQLStore) SaveCheckFromNode(ctx context.Context, siteID int, nodeID string, latencyNs int64, isUp bool) error {
+	_, err := s.db.ExecContext(ctx, s.q("INSERT INTO check_history (site_id, node_id, latency_ns, is_up) VALUES (?, ?, ?, ?)"), siteID, nodeID, latencyNs, isUp)
 	return err
-	}
-	var count int
-	_ = s.db.QueryRow(s.q("SELECT COUNT(*) FROM check_history WHERE site_id = ?"), siteID).Scan(&count)
-	if count > checkHistoryPruneAt {
-		pruneQuery := fmt.Sprintf(`DELETE FROM check_history WHERE site_id = ? AND id NOT IN (
-			SELECT id FROM check_history WHERE site_id = ? ORDER BY checked_at DESC LIMIT %d
+}
+
+// PruneCheckHistory trims check_history to the newest maxCheckHistory rows per
+// site, across all sites, in one pass. Intended to run periodically.
+func (s *SQLStore) PruneCheckHistory(ctx context.Context) error {
+	q := fmt.Sprintf(`DELETE FROM check_history WHERE id IN (
+		SELECT id FROM (
+			SELECT id, ROW_NUMBER() OVER (PARTITION BY site_id ORDER BY checked_at DESC, id DESC) AS rn
+			FROM check_history
+		) ranked WHERE rn > %d
 	)`, maxCheckHistory)
-		_, err = s.db.Exec(s.q(pruneQuery), siteID, siteID)
-		return err
-	}
-	return nil
-}
-
-func (s *SQLStore) RegisterNode(node models.ProbeNode) error {
-	_, err := s.db.Exec(s.dialect.UpsertNodeSQL(), node.ID, node.Name, node.Region, node.Version)
+	_, err := s.db.ExecContext(ctx, s.q(q))
 	return err
 }

-func (s *SQLStore) GetNode(id string) (models.ProbeNode, error) {
+// PruneStateChanges trims state_changes to the newest maxStateChangesPerSite
+// rows per site. Generous so realistic SLA windows are unaffected; bounds the
+// otherwise unbounded growth of a flapping monitor's history.
+func (s *SQLStore) PruneStateChanges(ctx context.Context) error {
+	q := fmt.Sprintf(`DELETE FROM state_changes WHERE id IN (
+		SELECT id FROM (
+			SELECT id, ROW_NUMBER() OVER (PARTITION BY site_id ORDER BY changed_at DESC, id DESC) AS rn
+			FROM state_changes
+		) ranked WHERE rn > %d
+	)`, maxStateChangesPerSite)
+	_, err := s.db.ExecContext(ctx, s.q(q))
+	return err
+}
+
+func (s *SQLStore) RegisterNode(ctx context.Context, node models.ProbeNode) error {
+	_, err := s.db.ExecContext(ctx, s.dialect.UpsertNodeSQL(), node.ID, node.Name, node.Region, node.Version)
+	return err
+}
+
+func (s *SQLStore) GetNode(ctx context.Context, id string) (models.ProbeNode, error) {
 	var n models.ProbeNode
-	err := s.db.QueryRow(s.q("SELECT id, name, region, last_seen, version FROM nodes WHERE id = ?"), id).
+	err := s.db.QueryRowContext(ctx, s.q("SELECT id, name, region, last_seen, version FROM nodes WHERE id = ?"), id).
 		Scan(&n.ID, &n.Name, &n.Region, &n.LastSeen, &n.Version)
 	return n, err
 }

-func (s *SQLStore) GetAllNodes() ([]models.ProbeNode, error) {
-	rows, err := s.db.Query("SELECT id, name, region, last_seen, version FROM nodes ORDER BY region, name")
+func (s *SQLStore) GetAllNodes(ctx context.Context) ([]models.ProbeNode, error) {
+	rows, err := s.db.QueryContext(ctx, "SELECT id, name, region, last_seen, version FROM nodes ORDER BY region, name")
 	if err != nil {
 		return nil, err
 	}
@@ -437,18 +496,18 @@ func (s *SQLStore) GetAllNodes() ([]models.ProbeNode, error) {
 	return nodes, rows.Err()
 }

-func (s *SQLStore) UpdateNodeLastSeen(id string) error {
-	_, err := s.db.Exec(s.q("UPDATE nodes SET last_seen = CURRENT_TIMESTAMP WHERE id = ?"), id)
+func (s *SQLStore) UpdateNodeLastSeen(ctx context.Context, id string) error {
+	_, err := s.db.ExecContext(ctx, s.q("UPDATE nodes SET last_seen = CURRENT_TIMESTAMP WHERE id = ?"), id)
 	return err
 }

-func (s *SQLStore) DeleteNode(id string) error {
-	_, err := s.db.Exec(s.q("DELETE FROM nodes WHERE id = ?"), id)
+func (s *SQLStore) DeleteNode(ctx context.Context, id string) error {
+	_, err := s.db.ExecContext(ctx, s.q("DELETE FROM nodes WHERE id = ?"), id)
 	return err
 }

-func (s *SQLStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
-	rows, err := s.db.Query("SELECT alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count FROM alert_health")
+func (s *SQLStore) LoadAlertHealth(ctx context.Context) (map[int]models.AlertHealthRecord, error) {
+	rows, err := s.db.QueryContext(ctx, "SELECT alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count FROM alert_health")
 	if err != nil {
 		return nil, err
 	}
@@ -468,29 +527,35 @@ func (s *SQLStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
 	return out, rows.Err()
 }

-func (s *SQLStore) SaveAlertHealth(h models.AlertHealthRecord) error {
+func (s *SQLStore) SaveAlertHealth(ctx context.Context, h models.AlertHealthRecord) error {
 	var lastSend interface{}
 	if !h.LastSendAt.IsZero() {
 		lastSend = h.LastSendAt
 	}
-	_, err := s.db.Exec(s.dialect.UpsertAlertHealthSQL(),
+	_, err := s.db.ExecContext(ctx, s.dialect.UpsertAlertHealthSQL(),
 		h.AlertID, lastSend, h.LastSendOK, h.LastError, h.SendCount, h.FailCount)
 	return err
 }

-func (s *SQLStore) SaveLog(message string) error {
-	_, err := s.db.Exec(s.q("INSERT INTO logs (message) VALUES (?)"), message)
-	if err != nil {
-		return err
-	}
-	_, err = s.db.Exec(s.q(`DELETE FROM logs WHERE id NOT IN (
-		SELECT id FROM logs ORDER BY created_at DESC LIMIT 200
-	)`))
+// SaveLog inserts a single log row. Retention is handled by PruneLogs on a
+// timer, not per-insert.
+func (s *SQLStore) SaveLog(ctx context.Context, message string) error {
+	_, err := s.db.ExecContext(ctx, s.q("INSERT INTO logs (message) VALUES (?)"), message)
 	return err
 }

-func (s *SQLStore) LoadLogs(limit int) ([]string, error) {
-	rows, err := s.db.Query(s.q("SELECT message FROM logs ORDER BY created_at DESC LIMIT ?"), limit)
+// PruneLogs trims the logs table to the newest maxLogRows rows. The id DESC
+// tiebreak keeps ordering deterministic when rows share a created_at second.
+func (s *SQLStore) PruneLogs(ctx context.Context) error {
+	q := fmt.Sprintf(`DELETE FROM logs WHERE id NOT IN (
+		SELECT id FROM logs ORDER BY created_at DESC, id DESC LIMIT %d
+	)`, maxLogRows)
+	_, err := s.db.ExecContext(ctx, s.q(q))
+	return err
+}
+
+func (s *SQLStore) LoadLogs(ctx context.Context, limit int) ([]string, error) {
+	rows, err := s.db.QueryContext(ctx, s.q("SELECT message FROM logs ORDER BY created_at DESC LIMIT ?"), limit)
 	if err != nil {
 		return nil, err
 	}
@@ -506,9 +571,9 @@ func (s *SQLStore) LoadLogs(limit int) ([]string, error) {
 	return logs, rows.Err()
 }

-func (s *SQLStore) LoadAllHistory(limit int) (map[int][]models.CheckRecord, error) {
+func (s *SQLStore) LoadAllHistory(ctx context.Context, limit int) (map[int][]models.CheckRecord, error) {
 	result := make(map[int][]models.CheckRecord)
-	rows, err := s.db.Query(s.q(`
+	rows, err := s.db.QueryContext(ctx, s.q(`
 		SELECT site_id, latency_ns, is_up FROM (
 			SELECT site_id, latency_ns, is_up,
 				ROW_NUMBER() OVER (PARTITION BY site_id ORDER BY checked_at DESC) AS rn
@@ -546,8 +611,8 @@ func (s *SQLStore) scanMaintenanceWindow(rows *sql.Rows) (models.MaintenanceWind
 	return mw, nil
 }

-func (s *SQLStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
-	rows, err := s.db.Query(s.q("SELECT id, monitor_id, title, description, type, start_time, end_time, created_by, created_at FROM maintenance_windows WHERE start_time <= CURRENT_TIMESTAMP AND (end_time IS NULL OR end_time > CURRENT_TIMESTAMP) ORDER BY start_time DESC"))
+func (s *SQLStore) GetActiveMaintenanceWindows(ctx context.Context) ([]models.MaintenanceWindow, error) {
+	rows, err := s.db.QueryContext(ctx, s.q("SELECT id, monitor_id, title, description, type, start_time, end_time, created_by, created_at FROM maintenance_windows WHERE start_time <= CURRENT_TIMESTAMP AND (end_time IS NULL OR end_time > CURRENT_TIMESTAMP) ORDER BY start_time DESC"))
 	if err != nil {
 		return nil, err
 	}
@@ -563,8 +628,8 @@ func (s *SQLStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, er
 	return windows, rows.Err()
 }

-func (s *SQLStore) GetAllMaintenanceWindows(limit int) ([]models.MaintenanceWindow, error) {
-	rows, err := s.db.Query(s.q("SELECT id, monitor_id, title, description, type, start_time, end_time, created_by, created_at FROM maintenance_windows ORDER BY created_at DESC LIMIT ?"), limit)
+func (s *SQLStore) GetAllMaintenanceWindows(ctx context.Context, limit int) ([]models.MaintenanceWindow, error) {
+	rows, err := s.db.QueryContext(ctx, s.q("SELECT id, monitor_id, title, description, type, start_time, end_time, created_by, created_at FROM maintenance_windows ORDER BY created_at DESC LIMIT ?"), limit)
 	if err != nil {
 		return nil, err
 	}
@@ -580,22 +645,22 @@ func (s *SQLStore) GetAllMaintenanceWindows(limit int) ([]models.MaintenanceWind
 	return windows, rows.Err()
 }

-func (s *SQLStore) AddMaintenanceWindow(mw models.MaintenanceWindow) error {
+func (s *SQLStore) AddMaintenanceWindow(ctx context.Context, mw models.MaintenanceWindow) error {
 	if mw.StartTime.IsZero() {
 		mw.StartTime = time.Now()
 	}
-	_, err := s.db.Exec(s.q("INSERT INTO maintenance_windows (monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?)"),
+	_, err := s.db.ExecContext(ctx, s.q("INSERT INTO maintenance_windows (monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?)"),
 		mw.MonitorID, mw.Title, mw.Description, mw.Type, mw.StartTime, sql.NullTime{Time: mw.EndTime, Valid: !mw.EndTime.IsZero()}, mw.CreatedBy)
 	return err
 }

-func (s *SQLStore) EndMaintenanceWindow(id int) error {
-	_, err := s.db.Exec(s.q("UPDATE maintenance_windows SET end_time = CURRENT_TIMESTAMP WHERE id = ?"), id)
+func (s *SQLStore) EndMaintenanceWindow(ctx context.Context, id int) error {
+	_, err := s.db.ExecContext(ctx, s.q("UPDATE maintenance_windows SET end_time = CURRENT_TIMESTAMP WHERE id = ?"), id)
 	return err
 }

-func (s *SQLStore) DeleteMaintenanceWindow(id int) error {
-	_, err := s.db.Exec(s.q("DELETE FROM maintenance_windows WHERE id = ?"), id)
+func (s *SQLStore) DeleteMaintenanceWindow(ctx context.Context, id int) error {
+	_, err := s.db.ExecContext(ctx, s.q("DELETE FROM maintenance_windows WHERE id = ?"), id)
 	if err != nil {
 		return err
 	}
@@ -603,9 +668,21 @@ func (s *SQLStore) DeleteMaintenanceWindow(id int) error {
 	return nil
 }

-func (s *SQLStore) IsMonitorInMaintenance(monitorID int) (bool, error) {
+func (s *SQLStore) PruneExpiredMaintenanceWindows(ctx context.Context, retention time.Duration) (int64, error) {
+	cutoff := time.Now().Add(-retention)
+	result, err := s.db.ExecContext(ctx,
+		s.q("DELETE FROM maintenance_windows WHERE end_time IS NOT NULL AND end_time < ?"),
+		cutoff,
+	)
+	if err != nil {
+		return 0, err
+	}
+	return result.RowsAffected()
+}
+
+func (s *SQLStore) IsMonitorInMaintenance(ctx context.Context, monitorID int) (bool, error) {
 	var count int
-	err := s.db.QueryRow(s.q(`SELECT COUNT(*) FROM maintenance_windows
+	err := s.db.QueryRowContext(ctx, s.q(`SELECT COUNT(*) FROM maintenance_windows
 		WHERE type = 'maintenance'
 		AND start_time <= CURRENT_TIMESTAMP
 		AND (end_time IS NULL OR end_time > CURRENT_TIMESTAMP)
@@ -618,46 +695,46 @@ func (s *SQLStore) IsMonitorInMaintenance(monitorID int) (bool, error) {
 	return count > 0, nil
 }

-func (s *SQLStore) GetPreference(key string) (string, error) {
+func (s *SQLStore) GetPreference(ctx context.Context, key string) (string, error) {
 	var value string
-	err := s.db.QueryRow(s.q("SELECT value FROM preferences WHERE key = ?"), key).Scan(&value)
+	err := s.db.QueryRowContext(ctx, s.q("SELECT value FROM preferences WHERE key = ?"), key).Scan(&value)
 	if err != nil {
 		return "", err
 	}
 	return value, nil
 }

-func (s *SQLStore) SetPreference(key, value string) error {
+func (s *SQLStore) SetPreference(ctx context.Context, key, value string) error {
 	if s.dollar {
-		_, err := s.db.Exec(s.q("INSERT INTO preferences (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value = ?"), key, value, value)
+		_, err := s.db.ExecContext(ctx, s.q("INSERT INTO preferences (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value = ?"), key, value, value)
 		return err
 	}
-	_, err := s.db.Exec("INSERT OR REPLACE INTO preferences (key, value) VALUES (?, ?)", key, value)
+	_, err := s.db.ExecContext(ctx, "INSERT OR REPLACE INTO preferences (key, value) VALUES (?, ?)", key, value)
 	return err
 }

-func (s *SQLStore) ExportData() (models.Backup, error) {
-	sites, err := s.GetSites()
+func (s *SQLStore) ExportData(ctx context.Context) (models.Backup, error) {
+	sites, err := s.GetSites(ctx)
 	if err != nil {
 		return models.Backup{}, err
 	}
-	alerts, err := s.GetAllAlerts()
+	alerts, err := s.GetAllAlerts(ctx)
 	if err != nil {
 		return models.Backup{}, err
 	}
-	users, err := s.GetAllUsers()
+	users, err := s.GetAllUsers(ctx)
 	if err != nil {
 		return models.Backup{}, err
 	}
-	windows, err := s.GetAllMaintenanceWindows(maxMaintenanceExport)
+	windows, err := s.GetAllMaintenanceWindows(ctx, maxMaintenanceExport)
 	if err != nil {
 		return models.Backup{}, err
 	}
 	return models.Backup{Sites: sites, Alerts: alerts, Users: users, MaintenanceWindows: windows}, nil
 }

-func (s *SQLStore) ImportData(data models.Backup) error {
-	tx, err := s.db.Begin()
+func (s *SQLStore) ImportData(ctx context.Context, data models.Backup) error {
+	tx, err := s.db.BeginTx(ctx, nil)
 	if err != nil {
 		return err
 	}
@@ -665,22 +742,29 @@ func (s *SQLStore) ImportData(data models.Backup) error {

 	s.dialect.ImportWipe(tx)

+	// Only wipe+replace users when callers explicitly provide them (CLI
+	// full restore). API/Kuma imports pass nil — existing users preserved.
+	if data.Users != nil {
+		s.dialect.ImportWipeUsers(tx)
 		for _, u := range data.Users {
-		if _, err := tx.Exec(s.q("INSERT INTO users (username, public_key, role) VALUES (?, ?, ?)"), u.Username, u.PublicKey, u.Role); err != nil {
+			if _, err := tx.ExecContext(ctx, s.q("INSERT INTO users (username, public_key, role) VALUES (?, ?, ?)"), u.Username, u.PublicKey, u.Role); err != nil {
 				return err
 			}
 		}
+	}
 	for _, a := range data.Alerts {
-		jsonBytes, err := json.Marshal(a.Settings)
+		// Encrypt on import exactly as AddAlert/UpdateAlert do, so a restore
+		// honors UPTOP_ENCRYPTION_KEY instead of writing secrets in plaintext.
+		settingsStr, err := s.marshalSettings(a.Settings)
 		if err != nil {
 			return err
 		}
-		if _, err := tx.Exec(s.q("INSERT INTO alerts (id, name, type, settings) VALUES (?, ?, ?, ?)"), a.ID, a.Name, a.Type, string(jsonBytes)); err != nil {
+		if _, err := tx.ExecContext(ctx, s.q("INSERT INTO alerts (id, name, type, settings) VALUES (?, ?, ?, ?)"), a.ID, a.Name, a.Type, settingsStr); err != nil {
 			return err
 		}
 	}
 	for _, st := range data.Sites {
-		if _, err := tx.Exec(s.q("INSERT INTO sites (id, name, url, type, token, interval, alert_id, check_ssl, threshold, max_retries, hostname, port, timeout, method, description, parent_id, accepted_codes, dns_resolve_type, dns_server, ignore_tls, paused, regions) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"),
+		if _, err := tx.ExecContext(ctx, s.q("INSERT INTO sites (id, name, url, type, token, interval, alert_id, check_ssl, threshold, max_retries, hostname, port, timeout, method, description, parent_id, accepted_codes, dns_resolve_type, dns_server, ignore_tls, paused, regions) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"),
 			st.ID, st.Name, st.URL, st.Type, st.Token, st.Interval, st.AlertID, st.CheckSSL, st.ExpiryThreshold, st.MaxRetries,
 			st.Hostname, st.Port, st.Timeout, st.Method, st.Description, st.ParentID, st.AcceptedCodes, st.DNSResolveType, st.DNSServer, st.IgnoreTLS, st.Paused, st.Regions); err != nil {
 			return err
@@ -688,7 +772,7 @@ func (s *SQLStore) ImportData(data models.Backup) error {
 	}

 	for _, mw := range data.MaintenanceWindows {
-		if _, err := tx.Exec(s.q("INSERT INTO maintenance_windows (id, monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"),
+		if _, err := tx.ExecContext(ctx, s.q("INSERT INTO maintenance_windows (id, monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"),
 			mw.ID, mw.MonitorID, mw.Title, mw.Description, mw.Type, mw.StartTime, sql.NullTime{Time: mw.EndTime, Valid: !mw.EndTime.IsZero()}, mw.CreatedBy); err != nil {
 			return err
 		}
@@ -1,8 +1,13 @@
 package store

 import (
-	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"context"
+	"fmt"
+	"strings"
 	"testing"
+	"time"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 )

 func newTestStore(t *testing.T) *SQLStore {
@@ -11,7 +16,7 @@ func newTestStore(t *testing.T) *SQLStore {
 	if err != nil {
 		t.Fatalf("NewSQLiteStore: %v", err)
 	}
-	if err := s.Init(); err != nil {
+	if err := s.Init(context.Background()); err != nil {
 		t.Fatalf("Init: %v", err)
 	}
 	return s
@@ -20,7 +25,7 @@ func newTestStore(t *testing.T) *SQLStore {
 func TestSiteCRUD(t *testing.T) {
 	s := newTestStore(t)

-	sites, err := s.GetSites()
+	sites, err := s.GetSites(context.Background())
 	if err != nil {
 		t.Fatalf("GetSites: %v", err)
 	}
@@ -28,11 +33,11 @@ func TestSiteCRUD(t *testing.T) {
 		t.Fatalf("expected 0 sites, got %d", len(sites))
 	}

-	if err := s.AddSite(models.Site{Name: "Test", URL: "https://example.com", Type: "http", Interval: 30}); err != nil {
+	if err := s.AddSite(context.Background(), models.SiteConfig{Name: "Test", URL: "https://example.com", Type: "http", Interval: 30}); err != nil {
 		t.Fatalf("AddSite: %v", err)
 	}

-	sites, err = s.GetSites()
+	sites, err = s.GetSites(context.Background())
 	if err != nil {
 		t.Fatalf("GetSites: %v", err)
 	}
@@ -44,20 +49,26 @@ func TestSiteCRUD(t *testing.T) {
 	}

 	sites[0].Name = "Updated"
-	if err := s.UpdateSite(sites[0]); err != nil {
+	if err := s.UpdateSite(context.Background(), sites[0]); err != nil {
 		t.Fatalf("UpdateSite: %v", err)
 	}

-	sites, _ = s.GetSites()
+	sites, err = s.GetSites(context.Background())
+	if err != nil {
+		t.Fatalf("GetSites: %v", err)
+	}
 	if sites[0].Name != "Updated" {
 		t.Errorf("expected name 'Updated', got '%s'", sites[0].Name)
 	}

-	if err := s.DeleteSite(sites[0].ID); err != nil {
+	if err := s.DeleteSite(context.Background(), sites[0].ID); err != nil {
 		t.Fatalf("DeleteSite: %v", err)
 	}

-	sites, _ = s.GetSites()
+	sites, err = s.GetSites(context.Background())
+	if err != nil {
+		t.Fatalf("GetSites: %v", err)
+	}
 	if len(sites) != 0 {
 		t.Fatalf("expected 0 sites after delete, got %d", len(sites))
 	}
@@ -66,11 +77,11 @@ func TestSiteCRUD(t *testing.T) {
 func TestAlertCRUD(t *testing.T) {
 	s := newTestStore(t)

-	if err := s.AddAlert("Discord", "discord", map[string]string{"url": "https://example.com/hook"}); err != nil {
+	if err := s.AddAlert(context.Background(), "Discord", "discord", map[string]string{"url": "https://example.com/hook"}); err != nil {
 		t.Fatalf("AddAlert: %v", err)
 	}

-	alerts, err := s.GetAllAlerts()
+	alerts, err := s.GetAllAlerts(context.Background())
 	if err != nil {
 		t.Fatalf("GetAllAlerts: %v", err)
 	}
@@ -84,7 +95,7 @@ func TestAlertCRUD(t *testing.T) {
 		t.Errorf("settings url mismatch")
 	}

-	a, err := s.GetAlert(alerts[0].ID)
+	a, err := s.GetAlert(context.Background(), alerts[0].ID)
 	if err != nil {
 		t.Fatalf("GetAlert: %v", err)
 	}
@@ -92,20 +103,26 @@ func TestAlertCRUD(t *testing.T) {
 		t.Errorf("expected name 'Discord', got '%s'", a.Name)
 	}

-	if err := s.UpdateAlert(a.ID, "Slack", "slack", map[string]string{"url": "https://slack.com/hook"}); err != nil {
+	if err := s.UpdateAlert(context.Background(), a.ID, "Slack", "slack", map[string]string{"url": "https://slack.com/hook"}); err != nil {
 		t.Fatalf("UpdateAlert: %v", err)
 	}

-	a, _ = s.GetAlert(a.ID)
+	a, err = s.GetAlert(context.Background(), a.ID)
+	if err != nil {
+		t.Fatalf("GetAlert: %v", err)
+	}
 	if a.Type != "slack" {
 		t.Errorf("expected type 'slack', got '%s'", a.Type)
 	}

-	if err := s.DeleteAlert(a.ID); err != nil {
+	if err := s.DeleteAlert(context.Background(), a.ID); err != nil {
 		t.Fatalf("DeleteAlert: %v", err)
 	}

-	alerts, _ = s.GetAllAlerts()
+	alerts, err = s.GetAllAlerts(context.Background())
+	if err != nil {
+		t.Fatalf("GetAllAlerts: %v", err)
+	}
 	if len(alerts) != 0 {
 		t.Fatalf("expected 0 alerts after delete, got %d", len(alerts))
 	}
@@ -114,11 +131,11 @@ func TestAlertCRUD(t *testing.T) {
 func TestUserCRUD(t *testing.T) {
 	s := newTestStore(t)

-	if err := s.AddUser("admin", "ssh-ed25519 AAAA...", "admin"); err != nil {
+	if err := s.AddUser(context.Background(), "admin", "ssh-ed25519 AAAA...", "admin"); err != nil {
 		t.Fatalf("AddUser: %v", err)
 	}

-	users, err := s.GetAllUsers()
+	users, err := s.GetAllUsers(context.Background())
 	if err != nil {
 		t.Fatalf("GetAllUsers: %v", err)
 	}
@@ -129,20 +146,26 @@ func TestUserCRUD(t *testing.T) {
 		t.Errorf("expected username 'admin', got '%s'", users[0].Username)
 	}

-	if err := s.UpdateUser(users[0].ID, "root", "ssh-ed25519 BBBB...", "admin"); err != nil {
+	if err := s.UpdateUser(context.Background(), users[0].ID, "root", "ssh-ed25519 BBBB...", "admin"); err != nil {
 		t.Fatalf("UpdateUser: %v", err)
 	}

-	users, _ = s.GetAllUsers()
+	users, err = s.GetAllUsers(context.Background())
+	if err != nil {
+		t.Fatalf("GetAllUsers: %v", err)
+	}
 	if users[0].Username != "root" {
 		t.Errorf("expected username 'root', got '%s'", users[0].Username)
 	}

-	if err := s.DeleteUser(users[0].ID); err != nil {
+	if err := s.DeleteUser(context.Background(), users[0].ID); err != nil {
 		t.Fatalf("DeleteUser: %v", err)
 	}

-	users, _ = s.GetAllUsers()
+	users, err = s.GetAllUsers(context.Background())
+	if err != nil {
+		t.Fatalf("GetAllUsers: %v", err)
+	}
 	if len(users) != 0 {
 		t.Fatalf("expected 0 users after delete, got %d", len(users))
 	}
@@ -151,11 +174,14 @@ func TestUserCRUD(t *testing.T) {
 func TestPushTokenGeneration(t *testing.T) {
 	s := newTestStore(t)

-	if err := s.AddSite(models.Site{Name: "Push Monitor", Type: "push", Interval: 60}); err != nil {
+	if err := s.AddSite(context.Background(), models.SiteConfig{Name: "Push Monitor", Type: "push", Interval: 60}); err != nil {
 		t.Fatalf("AddSite: %v", err)
 	}

-	sites, _ := s.GetSites()
+	sites, err := s.GetSites(context.Background())
+	if err != nil {
+		t.Fatalf("GetSites: %v", err)
+	}
 	if len(sites) != 1 {
 		t.Fatalf("expected 1 site, got %d", len(sites))
 	}
@@ -170,11 +196,17 @@ func TestPushTokenGeneration(t *testing.T) {
 func TestImportExport(t *testing.T) {
 	s := newTestStore(t)

-	s.AddAlert("Test Alert", "webhook", map[string]string{"url": "https://example.com"})
-	s.AddSite(models.Site{Name: "Site1", URL: "https://example.com", Type: "http", Interval: 30})
-	s.AddUser("user1", "ssh-ed25519 KEY", "user")
+	if err := s.AddAlert(context.Background(), "Test Alert", "webhook", map[string]string{"url": "https://example.com"}); err != nil {
+		t.Fatalf("AddAlert: %v", err)
+	}
+	if err := s.AddSite(context.Background(), models.SiteConfig{Name: "Site1", URL: "https://example.com", Type: "http", Interval: 30}); err != nil {
+		t.Fatalf("AddSite: %v", err)
+	}
+	if err := s.AddUser(context.Background(), "user1", "ssh-ed25519 KEY", "user"); err != nil {
+		t.Fatalf("AddUser: %v", err)
+	}

-	backup, err := s.ExportData()
+	backup, err := s.ExportData(context.Background())
 	if err != nil {
 		t.Fatalf("ExportData: %v", err)
 	}
@@ -183,32 +215,106 @@ func TestImportExport(t *testing.T) {
 	}

 	s2 := newTestStore(t)
-	if err := s2.ImportData(backup); err != nil {
+	if err := s2.ImportData(context.Background(), backup); err != nil {
 		t.Fatalf("ImportData: %v", err)
 	}

-	sites, _ := s2.GetSites()
-	alerts, _ := s2.GetAllAlerts()
-	users, _ := s2.GetAllUsers()
+	sites, err := s2.GetSites(context.Background())
+	if err != nil {
+		t.Fatalf("GetSites: %v", err)
+	}
+	alerts, err := s2.GetAllAlerts(context.Background())
+	if err != nil {
+		t.Fatalf("GetAllAlerts: %v", err)
+	}
+	users, err := s2.GetAllUsers(context.Background())
+	if err != nil {
+		t.Fatalf("GetAllUsers: %v", err)
+	}
 	if len(sites) != 1 || len(alerts) != 1 || len(users) != 1 {
 		t.Fatalf("import mismatch: %d sites, %d alerts, %d users", len(sites), len(alerts), len(users))
 	}
 }

+func TestImportData_WipesHistory(t *testing.T) {
+	s := newTestStore(t)
+
+	if err := s.AddSite(context.Background(), models.SiteConfig{Name: "OldSite", URL: "https://old.com", Type: "http", Interval: 30}); err != nil {
+		t.Fatalf("AddSite: %v", err)
+	}
+	if err := s.SaveCheck(context.Background(), 1, 5000, true); err != nil {
+		t.Fatalf("SaveCheck: %v", err)
+	}
+	if err := s.SaveStateChange(context.Background(), 1, "UP", "DOWN", "timeout"); err != nil {
+		t.Fatalf("SaveStateChange: %v", err)
+	}
+	if err := s.SaveAlertHealth(context.Background(), models.AlertHealthRecord{AlertID: 1, LastSendOK: true, SendCount: 1}); err != nil {
+		t.Fatalf("SaveAlertHealth: %v", err)
+	}
+
+	backup := models.Backup{
+		Sites: []models.SiteConfig{{ID: 1, Name: "NewSite", URL: "https://new.com", Type: "http", Interval: 60}},
+	}
+	if err := s.ImportData(context.Background(), backup); err != nil {
+		t.Fatalf("ImportData: %v", err)
+	}
+
+	history, err := s.LoadAllHistory(context.Background(), 100)
+	if err != nil {
+		t.Fatalf("LoadAllHistory: %v", err)
+	}
+	if len(history) != 0 {
+		t.Errorf("expected empty check_history after import, got %d sites with history", len(history))
+	}
+
+	changes, err := s.GetStateChanges(context.Background(), 1, 100)
+	if err != nil {
+		t.Fatalf("GetStateChanges: %v", err)
+	}
+	if len(changes) != 0 {
+		t.Errorf("expected empty state_changes after import, got %d", len(changes))
+	}
+}
+
+func TestImportData_NilUsersPreservesExisting(t *testing.T) {
+	s := newTestStore(t)
+
+	if err := s.AddUser(context.Background(), "admin", "ssh-ed25519 ADMINKEY", "admin"); err != nil {
+		t.Fatalf("AddUser: %v", err)
+	}
+
+	backup := models.Backup{
+		Sites:  []models.SiteConfig{{ID: 1, Name: "New", URL: "https://new.com", Type: "http", Interval: 30}},
+		Alerts: []models.AlertConfig{{ID: 1, Name: "a", Type: "webhook", Settings: map[string]string{"url": "https://h.com"}}},
+		Users:  nil,
+	}
+	if err := s.ImportData(context.Background(), backup); err != nil {
+		t.Fatalf("ImportData: %v", err)
+	}
+
+	users, err := s.GetAllUsers(context.Background())
+	if err != nil {
+		t.Fatalf("GetAllUsers: %v", err)
+	}
+	if len(users) != 1 || users[0].Username != "admin" {
+		t.Errorf("expected existing admin user preserved, got %d users", len(users))
+	}
+}
+
 func TestCheckHistory(t *testing.T) {
 	s := newTestStore(t)

-	if err := s.SaveCheck(1, 5000000, true); err != nil {
+	if err := s.SaveCheck(context.Background(), 1, 5000000, true); err != nil {
 		t.Fatalf("SaveCheck: %v", err)
 	}
-	if err := s.SaveCheck(1, 10000000, false); err != nil {
+	if err := s.SaveCheck(context.Background(), 1, 10000000, false); err != nil {
 		t.Fatalf("SaveCheck: %v", err)
 	}
-	if err := s.SaveCheck(2, 3000000, true); err != nil {
+	if err := s.SaveCheck(context.Background(), 2, 3000000, true); err != nil {
 		t.Fatalf("SaveCheck site 2: %v", err)
 	}

-	history, err := s.LoadAllHistory(10)
+	history, err := s.LoadAllHistory(context.Background(), 10)
 	if err != nil {
 		t.Fatalf("LoadAllHistory: %v", err)
 	}
@@ -229,3 +335,215 @@ func TestCheckHistory(t *testing.T) {
 		t.Errorf("expected 1 up record for site 1, got %d", upCount)
 	}
 }
+
+func TestDeleteSiteCascade(t *testing.T) {
+	s := newTestStore(t)
+
+	site := models.SiteConfig{Name: "Cascade Test", URL: "https://example.com", Interval: 30}
+	if err := s.AddSite(context.Background(), site); err != nil {
+		t.Fatalf("AddSite: %v", err)
+	}
+	sites, _ := s.GetSites(context.Background())
+	siteID := sites[0].ID
+
+	if err := s.SaveCheck(context.Background(), siteID, 1000, true); err != nil {
+		t.Fatalf("SaveCheck: %v", err)
+	}
+	if err := s.SaveStateChange(context.Background(), siteID, "UP", "DOWN", "timeout"); err != nil {
+		t.Fatalf("SaveStateChange: %v", err)
+	}
+	mw := models.MaintenanceWindow{
+		MonitorID: siteID,
+		Title:     "Test MW",
+		Type:      "maintenance",
+		StartTime: time.Now(),
+	}
+	if err := s.AddMaintenanceWindow(context.Background(), mw); err != nil {
+		t.Fatalf("AddMaintenanceWindow: %v", err)
+	}
+
+	if err := s.DeleteSite(context.Background(), siteID); err != nil {
+		t.Fatalf("DeleteSite: %v", err)
+	}
+
+	history, _ := s.LoadAllHistory(context.Background(), 100)
+	if len(history[siteID]) != 0 {
+		t.Errorf("expected 0 check_history rows, got %d", len(history[siteID]))
+	}
+
+	changes, _ := s.GetStateChanges(context.Background(), siteID, 100)
+	if len(changes) != 0 {
+		t.Errorf("expected 0 state_changes rows, got %d", len(changes))
+	}
+
+	windows, _ := s.GetActiveMaintenanceWindows(context.Background())
+	for _, w := range windows {
+		if w.MonitorID == siteID {
+			t.Errorf("orphaned maintenance window found: id=%d", w.ID)
+		}
+	}
+}
+
+func TestPruneLogs(t *testing.T) {
+	s := newTestStore(t)
+
+	for i := 0; i < maxLogRows+50; i++ {
+		if err := s.SaveLog(context.Background(), fmt.Sprintf("log %d", i)); err != nil {
+			t.Fatalf("SaveLog: %v", err)
+		}
+	}
+	if err := s.PruneLogs(context.Background()); err != nil {
+		t.Fatalf("PruneLogs: %v", err)
+	}
+
+	logs, err := s.LoadLogs(context.Background(), maxLogRows*2)
+	if err != nil {
+		t.Fatalf("LoadLogs: %v", err)
+	}
+	if len(logs) != maxLogRows {
+		t.Errorf("expected %d logs after prune, got %d", maxLogRows, len(logs))
+	}
+	// Newest must survive; oldest must be gone (membership, not position —
+	// LoadLogs ordering ties when rows share a created_at second).
+	present := make(map[string]bool, len(logs))
+	for _, l := range logs {
+		present[l] = true
+	}
+	if !present[fmt.Sprintf("log %d", maxLogRows+50-1)] {
+		t.Error("newest log was pruned")
+	}
+	if present["log 0"] {
+		t.Error("oldest log survived prune")
+	}
+}
+
+func TestPruneCheckHistory(t *testing.T) {
+	s := newTestStore(t)
+
+	for i := 0; i < maxCheckHistory+5; i++ {
+		if err := s.SaveCheck(context.Background(), 1, int64(i), true); err != nil {
+			t.Fatalf("SaveCheck site 1: %v", err)
+		}
+	}
+	for i := 0; i < 3; i++ {
+		if err := s.SaveCheck(context.Background(), 2, int64(i), true); err != nil {
+			t.Fatalf("SaveCheck site 2: %v", err)
+		}
+	}
+
+	if err := s.PruneCheckHistory(context.Background()); err != nil {
+		t.Fatalf("PruneCheckHistory: %v", err)
+	}
+
+	history, err := s.LoadAllHistory(context.Background(), maxCheckHistory*2)
+	if err != nil {
+		t.Fatalf("LoadAllHistory: %v", err)
+	}
+	if len(history[1]) != maxCheckHistory {
+		t.Errorf("site 1: expected %d rows after prune, got %d", maxCheckHistory, len(history[1]))
+	}
+	if len(history[2]) != 3 {
+		t.Errorf("site 2: expected 3 rows untouched, got %d", len(history[2]))
+	}
+}
+
+func TestPruneExpiredMaintenanceWindows(t *testing.T) {
+	s := newTestStore(t)
+
+	now := time.Now()
+
+	// Expired 10 days ago — should be pruned with 7d retention.
+	old := models.MaintenanceWindow{
+		MonitorID: 0,
+		Title:     "Old Window",
+		Type:      "maintenance",
+		StartTime: now.Add(-11 * 24 * time.Hour),
+		EndTime:   now.Add(-10 * 24 * time.Hour),
+	}
+	if err := s.AddMaintenanceWindow(context.Background(), old); err != nil {
+		t.Fatalf("AddMaintenanceWindow (old): %v", err)
+	}
+
+	// Expired 1 day ago — within 7d retention, should survive.
+	recent := models.MaintenanceWindow{
+		MonitorID: 0,
+		Title:     "Recent Window",
+		Type:      "maintenance",
+		StartTime: now.Add(-2 * 24 * time.Hour),
+		EndTime:   now.Add(-1 * 24 * time.Hour),
+	}
+	if err := s.AddMaintenanceWindow(context.Background(), recent); err != nil {
+		t.Fatalf("AddMaintenanceWindow (recent): %v", err)
+	}
+
+	// Ongoing — no end time, should survive.
+	ongoing := models.MaintenanceWindow{
+		MonitorID: 0,
+		Title:     "Ongoing Window",
+		Type:      "maintenance",
+		StartTime: now.Add(-1 * time.Hour),
+	}
+	if err := s.AddMaintenanceWindow(context.Background(), ongoing); err != nil {
+		t.Fatalf("AddMaintenanceWindow (ongoing): %v", err)
+	}
+
+	pruned, err := s.PruneExpiredMaintenanceWindows(context.Background(), 7*24*time.Hour)
+	if err != nil {
+		t.Fatalf("PruneExpiredMaintenanceWindows: %v", err)
+	}
+	if pruned != 1 {
+		t.Errorf("expected 1 pruned, got %d", pruned)
+	}
+
+	all, err := s.GetAllMaintenanceWindows(context.Background(), 100)
+	if err != nil {
+		t.Fatalf("GetAllMaintenanceWindows: %v", err)
+	}
+	if len(all) != 2 {
+		t.Fatalf("expected 2 remaining windows, got %d", len(all))
+	}
+	for _, w := range all {
+		if w.Title == "Old Window" {
+			t.Error("old window should have been pruned")
+		}
+	}
+}
+
+// ImportData must encrypt alert settings (like AddAlert/UpdateAlert) so a
+// restore with UPTOP_ENCRYPTION_KEY set never lands secrets in plaintext.
+func TestImportData_EncryptsAlertSettings(t *testing.T) {
+	s := newTestStore(t)
+	enc, err := NewEncryptor(strings.Repeat("ab", 32)) // 64 hex chars = 32 bytes
+	if err != nil {
+		t.Fatalf("NewEncryptor: %v", err)
+	}
+	s.SetEncryptor(enc)
+
+	backup := models.Backup{
+		Alerts: []models.AlertConfig{
+			{ID: 1, Name: "tg", Type: "telegram", Settings: map[string]string{"token": "123:SECRET", "chat_id": "42"}},
+		},
+	}
+	if err := s.ImportData(context.Background(), backup); err != nil {
+		t.Fatalf("ImportData: %v", err)
+	}
+
+	var raw string
+	if err := s.db.QueryRow("SELECT settings FROM alerts WHERE id = 1").Scan(&raw); err != nil {
+		t.Fatalf("query settings: %v", err)
+	}
+	if !strings.HasPrefix(raw, encryptedPrefix) {
+		t.Errorf("imported settings not encrypted: %q", raw)
+	}
+	if strings.Contains(raw, "SECRET") {
+		t.Errorf("plaintext secret found in stored column: %q", raw)
+	}
+
+	alerts, err := s.GetAllAlerts(context.Background())
+	if err != nil {
+		t.Fatalf("GetAllAlerts: %v", err)
+	}
+	if len(alerts) != 1 || alerts[0].Settings["token"] != "123:SECRET" {
+		t.Errorf("decrypt round-trip failed: %+v", alerts)
+	}
+}
@@ -1,80 +1,85 @@
 package store

 import (
+	"context"
 	"time"

 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 )

 type Store interface {
-	Init() error
+	Init(ctx context.Context) error

 	// Sites
-	GetSites() ([]models.Site, error)
-	AddSite(site models.Site) error
-	UpdateSite(site models.Site) error
-	UpdateSitePaused(id int, paused bool) error
-	DeleteSite(id int) error
+	GetSites(ctx context.Context) ([]models.SiteConfig, error)
+	AddSite(ctx context.Context, site models.SiteConfig) error
+	UpdateSite(ctx context.Context, site models.SiteConfig) error
+	UpdateSitePaused(ctx context.Context, id int, paused bool) error
+	DeleteSite(ctx context.Context, id int) error

 	// Alerts
-	GetAllAlerts() ([]models.AlertConfig, error)
-	GetAlert(id int) (models.AlertConfig, error)
-	AddAlert(name, aType string, settings map[string]string) error
-	UpdateAlert(id int, name, aType string, settings map[string]string) error
-	DeleteAlert(id int) error
+	GetAllAlerts(ctx context.Context) ([]models.AlertConfig, error)
+	GetAlert(ctx context.Context, id int) (models.AlertConfig, error)
+	AddAlert(ctx context.Context, name, aType string, settings map[string]string) error
+	UpdateAlert(ctx context.Context, id int, name, aType string, settings map[string]string) error
+	DeleteAlert(ctx context.Context, id int) error

 	// Declarative config support
-	GetSiteByName(name string) (models.Site, error)
-	GetAlertByName(name string) (models.AlertConfig, error)
-	AddSiteReturningID(site models.Site) (int, error)
-	AddAlertReturningID(name, aType string, settings map[string]string) (int, error)
+	GetSiteByName(ctx context.Context, name string) (models.SiteConfig, error)
+	GetAlertByName(ctx context.Context, name string) (models.AlertConfig, error)
+	AddSiteReturningID(ctx context.Context, site models.SiteConfig) (int, error)
+	AddAlertReturningID(ctx context.Context, name, aType string, settings map[string]string) (int, error)

 	// Users
-	GetAllUsers() ([]models.User, error)
-	AddUser(username, publicKey, role string) error
-	UpdateUser(id int, username, publicKey, role string) error
-	DeleteUser(id int) error
+	GetAllUsers(ctx context.Context) ([]models.User, error)
+	AddUser(ctx context.Context, username, publicKey, role string) error
+	UpdateUser(ctx context.Context, id int, username, publicKey, role string) error
+	DeleteUser(ctx context.Context, id int) error

 	// History
-	SaveCheck(siteID int, latencyNs int64, isUp bool) error
-	SaveCheckFromNode(siteID int, nodeID string, latencyNs int64, isUp bool) error
-	LoadAllHistory(limit int) (map[int][]models.CheckRecord, error)
+	SaveCheck(ctx context.Context, siteID int, latencyNs int64, isUp bool) error
+	SaveCheckFromNode(ctx context.Context, siteID int, nodeID string, latencyNs int64, isUp bool) error
+	LoadAllHistory(ctx context.Context, limit int) (map[int][]models.CheckRecord, error)
+	PruneCheckHistory(ctx context.Context) error

 	// State Changes
-	SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error
-	GetStateChanges(siteID int, limit int) ([]models.StateChange, error)
-	GetStateChangesSince(siteID int, since time.Time) ([]models.StateChange, error)
+	SaveStateChange(ctx context.Context, siteID int, fromStatus, toStatus, errorReason string) error
+	GetStateChanges(ctx context.Context, siteID int, limit int) ([]models.StateChange, error)
+	GetStateChangesSince(ctx context.Context, siteID int, since time.Time) ([]models.StateChange, error)
+	PruneStateChanges(ctx context.Context) error

 	// Nodes
-	RegisterNode(node models.ProbeNode) error
-	GetNode(id string) (models.ProbeNode, error)
-	GetAllNodes() ([]models.ProbeNode, error)
-	UpdateNodeLastSeen(id string) error
-	DeleteNode(id string) error
+	RegisterNode(ctx context.Context, node models.ProbeNode) error
+	GetNode(ctx context.Context, id string) (models.ProbeNode, error)
+	GetAllNodes(ctx context.Context) ([]models.ProbeNode, error)
+	UpdateNodeLastSeen(ctx context.Context, id string) error
+	DeleteNode(ctx context.Context, id string) error

 	// Alert Health
-	LoadAlertHealth() (map[int]models.AlertHealthRecord, error)
-	SaveAlertHealth(h models.AlertHealthRecord) error
+	LoadAlertHealth(ctx context.Context) (map[int]models.AlertHealthRecord, error)
+	SaveAlertHealth(ctx context.Context, h models.AlertHealthRecord) error

 	// Logs
-	SaveLog(message string) error
-	LoadLogs(limit int) ([]string, error)
+	SaveLog(ctx context.Context, message string) error
+	LoadLogs(ctx context.Context, limit int) ([]string, error)
+	PruneLogs(ctx context.Context) error

 	// Maintenance Windows
-	GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error)
-	GetAllMaintenanceWindows(limit int) ([]models.MaintenanceWindow, error)
-	AddMaintenanceWindow(mw models.MaintenanceWindow) error
-	EndMaintenanceWindow(id int) error
-	DeleteMaintenanceWindow(id int) error
-	IsMonitorInMaintenance(monitorID int) (bool, error)
+	GetActiveMaintenanceWindows(ctx context.Context) ([]models.MaintenanceWindow, error)
+	GetAllMaintenanceWindows(ctx context.Context, limit int) ([]models.MaintenanceWindow, error)
+	AddMaintenanceWindow(ctx context.Context, mw models.MaintenanceWindow) error
+	EndMaintenanceWindow(ctx context.Context, id int) error
+	DeleteMaintenanceWindow(ctx context.Context, id int) error
+	PruneExpiredMaintenanceWindows(ctx context.Context, retention time.Duration) (int64, error)
+	IsMonitorInMaintenance(ctx context.Context, monitorID int) (bool, error)

 	// Preferences
-	GetPreference(key string) (string, error)
-	SetPreference(key, value string) error
+	GetPreference(ctx context.Context, key string) (string, error)
+	SetPreference(ctx context.Context, key, value string) error

 	// Backup & Restore
-	ExportData() (models.Backup, error)
-	ImportData(data models.Backup) error
+	ExportData(ctx context.Context) (models.Backup, error)
+	ImportData(ctx context.Context, data models.Backup) error

 	// Lifecycle
 	Close() error
@@ -0,0 +1,276 @@
+package storetest
+
+import (
+	"context"
+	"time"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+)
+
+// BaseMock implements store.Store with no-op defaults. Embed it in test-specific
+// mocks and override only the methods you need via the exported Func fields or
+// by shadowing the method on the embedding struct.
+type BaseMock struct {
+	GetSitesFunc                    func(ctx context.Context) ([]models.SiteConfig, error)
+	AddSiteFunc                     func(ctx context.Context, site models.SiteConfig) error
+	UpdateSiteFunc                  func(ctx context.Context, site models.SiteConfig) error
+	GetAllAlertsFunc                func(ctx context.Context) ([]models.AlertConfig, error)
+	GetAlertFunc                    func(ctx context.Context, id int) (models.AlertConfig, error)
+	GetAllUsersFunc                 func(ctx context.Context) ([]models.User, error)
+	GetAllNodesFunc                 func(ctx context.Context) ([]models.ProbeNode, error)
+	GetActiveMaintenanceWindowsFunc func(ctx context.Context) ([]models.MaintenanceWindow, error)
+	GetAllMaintenanceWindowsFunc    func(ctx context.Context, limit int) ([]models.MaintenanceWindow, error)
+	IsMonitorInMaintenanceFunc      func(ctx context.Context, id int) (bool, error)
+	LoadAlertHealthFunc             func(ctx context.Context) (map[int]models.AlertHealthRecord, error)
+	LoadAllHistoryFunc              func(ctx context.Context, limit int) (map[int][]models.CheckRecord, error)
+	SaveCheckFunc                   func(ctx context.Context, siteID int, latencyNs int64, isUp bool) error
+	SaveCheckFromNodeFunc           func(ctx context.Context, siteID int, nodeID string, latencyNs int64, isUp bool) error
+	SaveLogFunc                     func(ctx context.Context, message string) error
+	SaveStateChangeFunc             func(ctx context.Context, siteID int, from, to, reason string) error
+	SaveAlertHealthFunc             func(ctx context.Context, h models.AlertHealthRecord) error
+	GetStateChangesFunc             func(ctx context.Context, siteID, limit int) ([]models.StateChange, error)
+	GetStateChangesSinceFunc        func(ctx context.Context, siteID int, since time.Time) ([]models.StateChange, error)
+	ExportDataFunc                  func(ctx context.Context) (models.Backup, error)
+	ImportDataFunc                  func(ctx context.Context, data models.Backup) error
+	RegisterNodeFunc                func(ctx context.Context, node models.ProbeNode) error
+	GetNodeFunc                     func(ctx context.Context, id string) (models.ProbeNode, error)
+	GetPreferenceFunc               func(ctx context.Context, key string) (string, error)
+	SetPreferenceFunc               func(ctx context.Context, key, value string) error
+}
+
+func (m *BaseMock) Init(_ context.Context) error { return nil }
+func (m *BaseMock) Close() error                 { return nil }
+
+func (m *BaseMock) GetSites(ctx context.Context) ([]models.SiteConfig, error) {
+	if m.GetSitesFunc != nil {
+		return m.GetSitesFunc(ctx)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) AddSite(ctx context.Context, site models.SiteConfig) error {
+	if m.AddSiteFunc != nil {
+		return m.AddSiteFunc(ctx, site)
+	}
+	return nil
+}
+
+func (m *BaseMock) UpdateSite(ctx context.Context, site models.SiteConfig) error {
+	if m.UpdateSiteFunc != nil {
+		return m.UpdateSiteFunc(ctx, site)
+	}
+	return nil
+}
+
+func (m *BaseMock) UpdateSitePaused(_ context.Context, _ int, _ bool) error { return nil }
+
+func (m *BaseMock) DeleteSite(_ context.Context, _ int) error { return nil }
+
+func (m *BaseMock) GetAllAlerts(ctx context.Context) ([]models.AlertConfig, error) {
+	if m.GetAllAlertsFunc != nil {
+		return m.GetAllAlertsFunc(ctx)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) GetAlert(ctx context.Context, id int) (models.AlertConfig, error) {
+	if m.GetAlertFunc != nil {
+		return m.GetAlertFunc(ctx, id)
+	}
+	return models.AlertConfig{}, nil
+}
+
+func (m *BaseMock) AddAlert(_ context.Context, _ string, _ string, _ map[string]string) error {
+	return nil
+}
+
+func (m *BaseMock) UpdateAlert(_ context.Context, _ int, _ string, _ string, _ map[string]string) error {
+	return nil
+}
+
+func (m *BaseMock) DeleteAlert(_ context.Context, _ int) error { return nil }
+
+func (m *BaseMock) GetSiteByName(_ context.Context, _ string) (models.SiteConfig, error) {
+	return models.SiteConfig{}, nil
+}
+
+func (m *BaseMock) GetAlertByName(_ context.Context, _ string) (models.AlertConfig, error) {
+	return models.AlertConfig{}, nil
+}
+
+func (m *BaseMock) AddSiteReturningID(_ context.Context, _ models.SiteConfig) (int, error) {
+	return 0, nil
+}
+
+func (m *BaseMock) AddAlertReturningID(_ context.Context, _ string, _ string, _ map[string]string) (int, error) {
+	return 0, nil
+}
+
+func (m *BaseMock) GetAllUsers(ctx context.Context) ([]models.User, error) {
+	if m.GetAllUsersFunc != nil {
+		return m.GetAllUsersFunc(ctx)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) AddUser(_ context.Context, _ string, _ string, _ string) error { return nil }
+
+func (m *BaseMock) UpdateUser(_ context.Context, _ int, _ string, _ string, _ string) error {
+	return nil
+}
+
+func (m *BaseMock) DeleteUser(_ context.Context, _ int) error { return nil }
+
+func (m *BaseMock) SaveCheck(ctx context.Context, siteID int, latencyNs int64, isUp bool) error {
+	if m.SaveCheckFunc != nil {
+		return m.SaveCheckFunc(ctx, siteID, latencyNs, isUp)
+	}
+	return nil
+}
+
+func (m *BaseMock) SaveCheckFromNode(ctx context.Context, siteID int, nodeID string, latencyNs int64, isUp bool) error {
+	if m.SaveCheckFromNodeFunc != nil {
+		return m.SaveCheckFromNodeFunc(ctx, siteID, nodeID, latencyNs, isUp)
+	}
+	return nil
+}
+
+func (m *BaseMock) LoadAllHistory(ctx context.Context, limit int) (map[int][]models.CheckRecord, error) {
+	if m.LoadAllHistoryFunc != nil {
+		return m.LoadAllHistoryFunc(ctx, limit)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) PruneCheckHistory(_ context.Context) error { return nil }
+
+func (m *BaseMock) SaveStateChange(ctx context.Context, siteID int, from, to, reason string) error {
+	if m.SaveStateChangeFunc != nil {
+		return m.SaveStateChangeFunc(ctx, siteID, from, to, reason)
+	}
+	return nil
+}
+
+func (m *BaseMock) GetStateChanges(ctx context.Context, siteID, limit int) ([]models.StateChange, error) {
+	if m.GetStateChangesFunc != nil {
+		return m.GetStateChangesFunc(ctx, siteID, limit)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) GetStateChangesSince(ctx context.Context, siteID int, since time.Time) ([]models.StateChange, error) {
+	if m.GetStateChangesSinceFunc != nil {
+		return m.GetStateChangesSinceFunc(ctx, siteID, since)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) PruneStateChanges(_ context.Context) error { return nil }
+
+func (m *BaseMock) RegisterNode(ctx context.Context, node models.ProbeNode) error {
+	if m.RegisterNodeFunc != nil {
+		return m.RegisterNodeFunc(ctx, node)
+	}
+	return nil
+}
+
+func (m *BaseMock) GetNode(ctx context.Context, id string) (models.ProbeNode, error) {
+	if m.GetNodeFunc != nil {
+		return m.GetNodeFunc(ctx, id)
+	}
+	return models.ProbeNode{}, nil
+}
+
+func (m *BaseMock) GetAllNodes(ctx context.Context) ([]models.ProbeNode, error) {
+	if m.GetAllNodesFunc != nil {
+		return m.GetAllNodesFunc(ctx)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) UpdateNodeLastSeen(_ context.Context, _ string) error { return nil }
+func (m *BaseMock) DeleteNode(_ context.Context, _ string) error         { return nil }
+
+func (m *BaseMock) LoadAlertHealth(ctx context.Context) (map[int]models.AlertHealthRecord, error) {
+	if m.LoadAlertHealthFunc != nil {
+		return m.LoadAlertHealthFunc(ctx)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) SaveAlertHealth(ctx context.Context, h models.AlertHealthRecord) error {
+	if m.SaveAlertHealthFunc != nil {
+		return m.SaveAlertHealthFunc(ctx, h)
+	}
+	return nil
+}
+
+func (m *BaseMock) SaveLog(ctx context.Context, message string) error {
+	if m.SaveLogFunc != nil {
+		return m.SaveLogFunc(ctx, message)
+	}
+	return nil
+}
+
+func (m *BaseMock) LoadLogs(_ context.Context, _ int) ([]string, error) { return nil, nil }
+func (m *BaseMock) PruneLogs(_ context.Context) error                   { return nil }
+
+func (m *BaseMock) GetActiveMaintenanceWindows(ctx context.Context) ([]models.MaintenanceWindow, error) {
+	if m.GetActiveMaintenanceWindowsFunc != nil {
+		return m.GetActiveMaintenanceWindowsFunc(ctx)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) GetAllMaintenanceWindows(ctx context.Context, limit int) ([]models.MaintenanceWindow, error) {
+	if m.GetAllMaintenanceWindowsFunc != nil {
+		return m.GetAllMaintenanceWindowsFunc(ctx, limit)
+	}
+	return nil, nil
+}
+
+func (m *BaseMock) AddMaintenanceWindow(_ context.Context, _ models.MaintenanceWindow) error {
+	return nil
+}
+
+func (m *BaseMock) EndMaintenanceWindow(_ context.Context, _ int) error    { return nil }
+func (m *BaseMock) DeleteMaintenanceWindow(_ context.Context, _ int) error { return nil }
+
+func (m *BaseMock) PruneExpiredMaintenanceWindows(_ context.Context, _ time.Duration) (int64, error) {
+	return 0, nil
+}
+
+func (m *BaseMock) IsMonitorInMaintenance(ctx context.Context, id int) (bool, error) {
+	if m.IsMonitorInMaintenanceFunc != nil {
+		return m.IsMonitorInMaintenanceFunc(ctx, id)
+	}
+	return false, nil
+}
+
+func (m *BaseMock) GetPreference(ctx context.Context, key string) (string, error) {
+	if m.GetPreferenceFunc != nil {
+		return m.GetPreferenceFunc(ctx, key)
+	}
+	return "", nil
+}
+
+func (m *BaseMock) SetPreference(ctx context.Context, key, value string) error {
+	if m.SetPreferenceFunc != nil {
+		return m.SetPreferenceFunc(ctx, key, value)
+	}
+	return nil
+}
+
+func (m *BaseMock) ExportData(ctx context.Context) (models.Backup, error) {
+	if m.ExportDataFunc != nil {
+		return m.ExportDataFunc(ctx)
+	}
+	return models.Backup{}, nil
+}
+
+func (m *BaseMock) ImportData(ctx context.Context, data models.Backup) error {
+	if m.ImportDataFunc != nil {
+		return m.ImportDataFunc(ctx, data)
+	}
+	return nil
+}
@@ -1,17 +1,20 @@
 package tui

 import (
+	"context"
 	"encoding/json"
 	"sort"
 	"strings"
+	"time"

 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
+	tea "github.com/charmbracelet/bubbletea"
 )

 func loadCollapsed(s store.Store) map[int]bool {
 	m := make(map[int]bool)
-	raw, err := s.GetPreference("collapsed_groups")
+	raw, err := s.GetPreference(context.Background(), "collapsed_groups")
 	if err != nil || raw == "" {
 		return m
 	}
@@ -25,7 +28,9 @@ func loadCollapsed(s store.Store) map[int]bool {
 	return m
 }

-func saveCollapsed(s store.Store, collapsed map[int]bool) {
+// collapsedJSON snapshots the collapsed-group set for persistence. Marshaling
+// happens on the UI goroutine so the write Cmd never reads the live map.
+func collapsedJSON(collapsed map[int]bool) string {
 	var ids []int
 	for id, v := range collapsed {
 		if v {
@@ -33,7 +38,15 @@ func saveCollapsed(s store.Store, collapsed map[int]bool) {
 		}
 	}
 	data, _ := json.Marshal(ids)
-	_ = s.SetPreference("collapsed_groups", string(data))
+	return string(data)
+}
+
+// writeCmd runs a store mutation off the UI goroutine. The closure must only
+// capture values snapshotted in Update — never the model itself.
+func writeCmd(op string, fn func() error) tea.Cmd {
+	return func() tea.Msg {
+		return writeDoneMsg{op: op, err: fn()}
+	}
 }

 func sortSitesForDisplay(allSites []models.Site, collapsed map[int]bool) []models.Site {
@@ -80,41 +93,39 @@ func filterSites(sites []models.Site, needle string) []models.Site {
 	return filtered
 }

-func (m *Model) refreshData() {
+// refreshLive updates everything sourced from in-memory engine copies — the
+// live site list (sorted + filtered) and the log viewport. It does no database
+// IO, so it is safe to call on every tick. DB-backed tab data is loaded
+// separately via loadTabDataCmd.
+func (m *Model) refreshLive() {
 	allSites := m.engine.GetAllSites()
 	ordered := sortSitesForDisplay(allSites, m.collapsed)
 	if m.filterText != "" {
 		ordered = filterSites(ordered, m.filterText)
 	}
 	m.sites = ordered
+	m.refreshLogContent()

-	if alerts, err := m.store.GetAllAlerts(); err == nil {
-		m.alerts = alerts
-	}
-	if m.isAdmin {
-		if users, err := m.store.GetAllUsers(); err == nil {
-			m.users = users
+	if m.currentTab == 0 && m.selectedID != 0 {
+		for i, s := range m.sites {
+			if s.ID == m.selectedID {
+				m.cursor = i
+				break
 			}
 		}
-	if nodes, err := m.store.GetAllNodes(); err == nil {
-		m.nodes = nodes
 	}
-	if windows, err := m.store.GetAllMaintenanceWindows(100); err == nil {
-		m.maintenanceWindows = windows
-	}
-	m.logViewport.SetContent(strings.Join(m.engine.GetLogs(), "\n"))
+	m.clampCursor()
+}

-	listLen := len(m.sites)
-	switch m.currentTab {
-	case 1:
-		listLen = len(m.alerts)
-	case 3:
-		listLen = len(m.nodes)
-	case 4:
-		listLen = len(m.maintenanceWindows)
-	case 5:
-		listLen = len(m.users)
+func (m *Model) syncSelectedID() {
+	if m.currentTab == 0 && m.cursor < len(m.sites) {
+		m.selectedID = m.sites[m.cursor].ID
 	}
+}
+
+// clampCursor keeps the cursor and scroll offset within the current tab's list.
+func (m *Model) clampCursor() {
+	listLen := m.currentListLen()
 	if listLen > 0 && m.cursor >= listLen {
 		m.cursor = listLen - 1
 	}
@@ -122,3 +133,71 @@ func (m *Model) refreshData() {
 		m.tableOffset = m.cursor
 	}
 }
+
+// loadTabDataCmd returns a tea.Cmd that loads the DB-backed tab tables off the
+// UI goroutine. Each call bumps tabSeq and stamps the reply with it, so
+// handleTabData can drop out-of-order results from slower earlier loads. The
+// closure reads only stable fields (store, isAdmin) and never mutates the
+// model; results come back as a tabDataMsg. On the first store error it
+// returns an error-only msg so the model keeps its previous data.
+func (m *Model) loadTabDataCmd() tea.Cmd {
+	m.tabSeq++
+	seq := m.tabSeq
+	st := m.store
+	isAdmin := m.isAdmin
+	return func() tea.Msg {
+		ctx := context.Background()
+		alerts, err := st.GetAllAlerts(ctx)
+		if err != nil {
+			return tabDataMsg{seq: seq, err: err}
+		}
+		var users []models.User
+		if isAdmin {
+			if users, err = st.GetAllUsers(ctx); err != nil {
+				return tabDataMsg{seq: seq, err: err}
+			}
+		}
+		nodes, err := st.GetAllNodes(ctx)
+		if err != nil {
+			return tabDataMsg{seq: seq, err: err}
+		}
+		maint, err := st.GetAllMaintenanceWindows(ctx, 100)
+		if err != nil {
+			return tabDataMsg{seq: seq, err: err}
+		}
+		return tabDataMsg{seq: seq, alerts: alerts, users: users, nodes: nodes, maint: maint}
+	}
+}
+
+// loadDetailCmd loads the state-change history for the detail panel off the UI
+// goroutine. View renders the cached result rather than querying the DB.
+func (m *Model) loadDetailCmd(siteID int) tea.Cmd {
+	eng := m.engine
+	return func() tea.Msg {
+		return detailDataMsg{siteID: siteID, changes: eng.GetStateChanges(siteID, 5)}
+	}
+}
+
+// loadHistoryCmd loads the full state-change history for the history view off
+// the UI goroutine.
+func (m *Model) loadHistoryCmd(siteID int) tea.Cmd {
+	eng := m.engine
+	return func() tea.Msg {
+		return historyDataMsg{siteID: siteID, changes: eng.GetStateChanges(siteID, 100)}
+	}
+}
+
+// loadSLACmd loads the state changes backing the SLA view off the UI
+// goroutine. The reply carries the request's site and period so a stale reply
+// can be recognized and dropped.
+func (m *Model) loadSLACmd(siteID, periodIdx int) tea.Cmd {
+	eng := m.engine
+	since := time.Now().Add(-slaPeriods[periodIdx].duration)
+	return func() tea.Msg {
+		return slaDataMsg{
+			siteID:    siteID,
+			periodIdx: periodIdx,
+			changes:   eng.GetStateChangesSince(siteID, since),
+		}
+	}
+}
@@ -8,9 +8,9 @@ import (

 func TestSortSitesForDisplay_GroupsFirst(t *testing.T) {
 	sites := []models.Site{
-		{ID: 3, Name: "ungrouped", Type: "http", Status: "UP"},
-		{ID: 1, Name: "group-a", Type: "group", Status: "UP"},
-		{ID: 2, Name: "child", Type: "http", Status: "UP", ParentID: 1},
+		{SiteConfig: models.SiteConfig{ID: 3, Name: "ungrouped", Type: "http"}, SiteState: models.SiteState{Status: "UP"}},
+		{SiteConfig: models.SiteConfig{ID: 1, Name: "group-a", Type: "group"}, SiteState: models.SiteState{Status: "UP"}},
+		{SiteConfig: models.SiteConfig{ID: 2, Name: "child", Type: "http", ParentID: 1}, SiteState: models.SiteState{Status: "UP"}},
 	}
 	result := sortSitesForDisplay(sites, nil)
 	if len(result) != 3 {
@@ -29,9 +29,9 @@ func TestSortSitesForDisplay_GroupsFirst(t *testing.T) {

 func TestSortSitesForDisplay_CollapsedHidesChildren(t *testing.T) {
 	sites := []models.Site{
-		{ID: 1, Name: "group-a", Type: "group", Status: "UP"},
-		{ID: 2, Name: "child-1", Type: "http", Status: "UP", ParentID: 1},
-		{ID: 3, Name: "child-2", Type: "http", Status: "UP", ParentID: 1},
+		{SiteConfig: models.SiteConfig{ID: 1, Name: "group-a", Type: "group"}, SiteState: models.SiteState{Status: "UP"}},
+		{SiteConfig: models.SiteConfig{ID: 2, Name: "child-1", Type: "http", ParentID: 1}, SiteState: models.SiteState{Status: "UP"}},
+		{SiteConfig: models.SiteConfig{ID: 3, Name: "child-2", Type: "http", ParentID: 1}, SiteState: models.SiteState{Status: "UP"}},
 	}
 	collapsed := map[int]bool{1: true}
 	result := sortSitesForDisplay(sites, collapsed)
@@ -45,9 +45,9 @@ func TestSortSitesForDisplay_CollapsedHidesChildren(t *testing.T) {

 func TestSortSitesForDisplay_StatusOrdering(t *testing.T) {
 	sites := []models.Site{
-		{ID: 1, Name: "up-site", Type: "http", Status: "UP"},
-		{ID: 2, Name: "down-site", Type: "http", Status: "DOWN"},
-		{ID: 3, Name: "late-site", Type: "http", Status: "LATE"},
+		{SiteConfig: models.SiteConfig{ID: 1, Name: "up-site", Type: "http"}, SiteState: models.SiteState{Status: "UP"}},
+		{SiteConfig: models.SiteConfig{ID: 2, Name: "down-site", Type: "http"}, SiteState: models.SiteState{Status: "DOWN"}},
+		{SiteConfig: models.SiteConfig{ID: 3, Name: "late-site", Type: "http"}, SiteState: models.SiteState{Status: "LATE"}},
 	}
 	result := sortSitesForDisplay(sites, nil)
 	if result[0].Status != "DOWN" {
@@ -63,9 +63,9 @@ func TestSortSitesForDisplay_StatusOrdering(t *testing.T) {

 func TestFilterSites(t *testing.T) {
 	sites := []models.Site{
-		{Name: "Production API"},
-		{Name: "Staging API"},
-		{Name: "Database"},
+		{SiteConfig: models.SiteConfig{Name: "Production API"}},
+		{SiteConfig: models.SiteConfig{Name: "Staging API"}},
+		{SiteConfig: models.SiteConfig{Name: "Database"}},
 	}

 	tests := []struct {
@@ -87,7 +87,7 @@ func TestFilterSites(t *testing.T) {
 }

 func TestFilterSites_EmptyNeedle(t *testing.T) {
-	sites := []models.Site{{Name: "a"}, {Name: "b"}}
+	sites := []models.Site{{SiteConfig: models.SiteConfig{Name: "a"}}, {SiteConfig: models.SiteConfig{Name: "b"}}}
 	got := filterSites(sites, "")
 	if len(got) != 2 {
 		t.Errorf("empty needle should return all, got %d", len(got))
@@ -2,12 +2,41 @@ package tui

 import (
 	"fmt"
+	"strings"
 	"time"

 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"github.com/charmbracelet/lipgloss"
 )

+func (m Model) dividerWidth() int {
+	w := m.termWidth - chromePadH - 4
+	if w < 40 {
+		w = 40
+	}
+	return w
+}
+
+func (m Model) divider() string {
+	return "  " + m.st.subtleStyle.Render(strings.Repeat("─", m.dividerWidth()))
+}
+
+func (m Model) emptyState(message, hint string) string {
+	content := message
+	if hint != "" {
+		content += "\n\n" + m.st.subtleStyle.Render(hint)
+	}
+	return "\n" + lipgloss.NewStyle().
+		Border(lipgloss.RoundedBorder()).
+		BorderForeground(m.theme.Accent).
+		Padding(1, 3).
+		Render(content)
+}
+
 func limitStr(text string, max int) string {
+	if max < 3 {
+		return text
+	}
 	runes := []rune(text)
 	if len(runes) > max {
 		return string(runes[:max-3]) + "..."
@@ -55,10 +84,10 @@ func typeIcon(siteType string, collapsed bool) string {
 	}
 }

-func fmtLatency(d time.Duration) string {
+func (m Model) fmtLatency(d time.Duration) string {
 	ms := d.Milliseconds()
 	if ms == 0 {
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	}
 	var s string
 	if ms < 1000 {
@@ -67,17 +96,17 @@ func fmtLatency(d time.Duration) string {
 		s = fmt.Sprintf("%.1fs", float64(ms)/1000)
 	}
 	if ms < 200 {
-		return specialStyle.Render(s)
+		return m.st.specialStyle.Render(s)
 	}
 	if ms < 500 {
-		return warnStyle.Render(s)
+		return m.st.warnStyle.Render(s)
 	}
-	return dangerStyle.Render(s)
+	return m.st.dangerStyle.Render(s)
 }

-func fmtUptime(statuses []bool) string {
+func (m Model) fmtUptime(statuses []bool) string {
 	if len(statuses) == 0 {
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	}
 	up := 0
 	for _, s := range statuses {
@@ -88,71 +117,84 @@ func fmtUptime(statuses []bool) string {
 	pct := float64(up) / float64(len(statuses)) * 100
 	s := fmt.Sprintf("%.1f%%", pct)
 	if pct >= 99 {
-		return specialStyle.Render(s)
+		return m.st.specialStyle.Render(s)
 	}
 	if pct >= 95 {
-		return warnStyle.Render(s)
+		return m.st.warnStyle.Render(s)
 	}
-	return dangerStyle.Render(s)
+	return m.st.dangerStyle.Render(s)
 }

-func fmtSSL(site models.Site) string {
+func (m Model) fmtSSL(site models.Site) string {
 	if site.Type != "http" || !site.CheckSSL || !site.HasSSL {
-		return subtleStyle.Render("-")
+		return m.st.subtleStyle.Render("-")
 	}
 	days := int(time.Until(site.CertExpiry).Hours() / 24)
 	s := fmt.Sprintf("%dd", days)
 	if days <= 0 {
-		return dangerStyle.Render("EXPIRED")
+		return m.st.dangerStyle.Render("EXPIRED")
 	}
 	if days <= site.ExpiryThreshold {
-		return warnStyle.Render(s)
+		return m.st.warnStyle.Render(s)
 	}
-	return specialStyle.Render(s)
+	return m.st.specialStyle.Render(s)
 }

-func fmtRetries(site models.Site) string {
+func (m Model) fmtRetries(site models.Site) string {
 	dispCount := site.FailureCount
 	if dispCount > site.MaxRetries {
 		dispCount = site.MaxRetries
 	}
 	s := fmt.Sprintf("%d/%d", dispCount, site.MaxRetries)
-	if site.Status == "DOWN" {
-		return dangerStyle.Render(s)
+	if site.Status == models.StatusDown {
+		return m.st.dangerStyle.Render(s)
 	}
-	if site.Status == "UP" && site.FailureCount > 0 {
-		return warnStyle.Render(s)
+	if site.Status == models.StatusUp && site.FailureCount > 0 {
+		return m.st.warnStyle.Render(s)
 	}
 	return s
 }

-func fmtStatus(status string, paused bool, inMaint bool, errCategory ErrorCategory) string {
+func (m Model) fmtStatus(status models.Status, paused bool, inMaint bool) string {
 	if paused {
-		return warnStyle.Render("PAUSED")
+		return m.st.warnStyle.Render("◇ PAUSED")
 	}
 	if inMaint {
-		return maintStyle.Render("MAINT")
+		return m.st.maintStyle.Render("◼ MAINT")
 	}
 	switch status {
-	case "DOWN":
-		label := "DOWN"
-		if errCategory != ErrCatUnknown {
-			label = "DOWN:" + string(errCategory)
-		}
-		return dangerStyle.Render(label)
-	case "SSL EXP":
-		return dangerStyle.Render(status)
-	case "LATE":
-		return warnStyle.Render(status)
-	case "STALE":
-		return staleStyle.Render(status)
-	case "PENDING":
-		return subtleStyle.Render(status)
+	case models.StatusDown:
+		return m.st.dangerStyle.Render("▼ DOWN")
+	case models.StatusSSLExp:
+		return m.st.dangerStyle.Render("▼ SSL EXP")
+	case models.StatusLate:
+		return m.st.warnStyle.Render("◆ LATE")
+	case models.StatusStale:
+		return m.st.staleStyle.Render("◆ STALE")
+	case models.StatusPending:
+		return m.st.subtleStyle.Render("○ PENDING")
 	default:
-		return specialStyle.Render(status)
+		return m.st.specialStyle.Render("▲ " + string(status))
 	}
 }

+func (m Model) fmtTimeAgo(t time.Time) string {
+	if t.IsZero() {
+		return m.st.subtleStyle.Render("never")
+	}
+	d := time.Since(t)
+	if d < time.Minute {
+		return fmt.Sprintf("%ds ago", int(d.Seconds()))
+	}
+	if d < time.Hour {
+		return fmt.Sprintf("%dm ago", int(d.Minutes()))
+	}
+	if d < 24*time.Hour {
+		return fmt.Sprintf("%dh ago", int(d.Hours()))
+	}
+	return fmt.Sprintf("%dd ago", int(d.Hours())/24)
+}
+
 func fmtDuration(d time.Duration) string {
 	if d < time.Minute {
 		return fmt.Sprintf("%ds", int(d.Seconds()))
@@ -7,9 +7,8 @@ import (
 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 )

-func init() {
-	applyTheme(themeFlexokiDark)
-}
+// styledModel carries a default-theme styles instance for render-helper tests.
+var styledModel = Model{st: newStyles(themeFlexokiDark)}

 func TestLimitStr(t *testing.T) {
 	tests := []struct {
@@ -39,13 +38,13 @@ func TestSiteOrder(t *testing.T) {
 		site models.Site
 		want int
 	}{
-		{"down", models.Site{Status: "DOWN"}, 0},
-		{"ssl exp", models.Site{Status: "SSL EXP"}, 0},
-		{"late", models.Site{Status: "LATE"}, 1},
-		{"up", models.Site{Status: "UP"}, 2},
-		{"pending", models.Site{Status: "PENDING"}, 3},
-		{"paused up", models.Site{Status: "UP", Paused: true}, 3},
-		{"paused down", models.Site{Status: "DOWN", Paused: true}, 3},
+		{"down", models.Site{SiteState: models.SiteState{Status: "DOWN"}}, 0},
+		{"ssl exp", models.Site{SiteState: models.SiteState{Status: "SSL EXP"}}, 0},
+		{"late", models.Site{SiteState: models.SiteState{Status: "LATE"}}, 1},
+		{"up", models.Site{SiteState: models.SiteState{Status: "UP"}}, 2},
+		{"pending", models.Site{SiteState: models.SiteState{Status: "PENDING"}}, 3},
+		{"paused up", models.Site{SiteConfig: models.SiteConfig{Paused: true}, SiteState: models.SiteState{Status: "UP"}}, 3},
+		{"paused down", models.Site{SiteConfig: models.SiteConfig{Paused: true}, SiteState: models.SiteState{Status: "DOWN"}}, 3},
 	}
 	for _, tt := range tests {
 		got := siteOrder(tt.site)
@@ -55,32 +54,27 @@ func TestSiteOrder(t *testing.T) {
 	}
 }

-func TestFmtStatus_ErrorCategory(t *testing.T) {
+func TestFmtStatus(t *testing.T) {
 	tests := []struct {
-		status  string
+		status  models.Status
 		paused  bool
 		inMaint bool
-		cat     ErrorCategory
 		wantSub string
 	}{
-		{"DOWN", false, false, ErrCatDNS, "DOWN:DNS"},
-		{"DOWN", false, false, ErrCatTLS, "DOWN:TLS"},
-		{"DOWN", false, false, ErrCatHTTP, "DOWN:HTTP"},
-		{"DOWN", false, false, ErrCatTCP, "DOWN:TCP"},
-		{"DOWN", false, false, ErrCatTimeout, "DOWN:TMO"},
-		{"DOWN", false, false, ErrCatICMP, "DOWN:ICMP"},
-		{"DOWN", false, false, ErrCatPrivate, "DOWN:PRIV"},
-		{"DOWN", false, false, ErrCatUnknown, "DOWN"},
-		{"UP", false, false, ErrCatUnknown, "UP"},
-		{"SSL EXP", false, false, ErrCatUnknown, "SSL EXP"},
-		{"DOWN", true, false, ErrCatDNS, "PAUSED"},
-		{"DOWN", false, true, ErrCatDNS, "MAINT"},
+		{models.StatusDown, false, false, "▼ DOWN"},
+		{models.StatusUp, false, false, "▲ UP"},
+		{models.StatusSSLExp, false, false, "▼ SSL EXP"},
+		{models.StatusLate, false, false, "◆ LATE"},
+		{models.StatusStale, false, false, "◆ STALE"},
+		{models.StatusPending, false, false, "○ PENDING"},
+		{models.StatusDown, true, false, "◇ PAUSED"},
+		{models.StatusDown, false, true, "◼ MAINT"},
 	}
 	for _, tt := range tests {
-		got := fmtStatus(tt.status, tt.paused, tt.inMaint, tt.cat)
+		got := styledModel.fmtStatus(tt.status, tt.paused, tt.inMaint)
 		if !containsPlain(got, tt.wantSub) {
-			t.Errorf("fmtStatus(%q, paused=%v, maint=%v, %q): %q missing %q",
-				tt.status, tt.paused, tt.inMaint, tt.cat, got, tt.wantSub)
+			t.Errorf("fmtStatus(%q, paused=%v, maint=%v): %q missing %q",
+				tt.status, tt.paused, tt.inMaint, got, tt.wantSub)
 		}
 	}
 }
@@ -141,7 +135,7 @@ func TestFmtUptime(t *testing.T) {
 		{"all down", []bool{false, false}, "0.0%"},
 	}
 	for _, tt := range tests {
-		got := fmtUptime(tt.statuses)
+		got := styledModel.fmtUptime(tt.statuses)
 		if !containsPlain(got, tt.wantSub) {
 			t.Errorf("fmtUptime(%s): %q missing %q", tt.name, got, tt.wantSub)
 		}
@@ -159,7 +153,7 @@ func TestFmtLatency(t *testing.T) {
 		{1500 * time.Millisecond, "1.5s"},
 	}
 	for _, tt := range tests {
-		got := fmtLatency(tt.d)
+		got := styledModel.fmtLatency(tt.d)
 		if !containsPlain(got, tt.wantSub) {
 			t.Errorf("fmtLatency(%v): %q missing %q", tt.d, got, tt.wantSub)
 		}
@@ -0,0 +1,64 @@
+package tui
+
+import (
+	"time"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+)
+
+// tabRefreshTTL bounds how often the DB-backed tab data (alerts, users, nodes,
+// maintenance windows) is reloaded. Live sites + logs come from in-memory
+// engine copies and refresh every tick; the DB tables change rarely, so a 5s
+// floor keeps tab-bar counts fresh without a per-second query storm.
+const tabRefreshTTL = 5 * time.Second
+
+// tickMsg is the once-per-second heartbeat. A named type (vs a bare time.Time)
+// keeps it from colliding with any other time-valued message.
+type tickMsg time.Time
+
+// tabDataMsg carries the result of an async load of the DB-backed tab tables.
+// On err, the model keeps its previous data and logs — never wiping the view on
+// a transient store error. seq orders in-flight loads: replies whose seq is
+// older than the model's current tabSeq are dropped, so a slow load can never
+// overwrite the result of a newer one.
+type tabDataMsg struct {
+	seq    int
+	alerts []models.AlertConfig
+	users  []models.User
+	nodes  []models.ProbeNode
+	maint  []models.MaintenanceWindow
+	err    error
+}
+
+// detailDataMsg carries the state-change history for the detail panel, loaded
+// on entry and refreshed on the tab-data cadence so View never touches the
+// database.
+type detailDataMsg struct {
+	siteID  int
+	changes []models.StateChange
+}
+
+// historyDataMsg carries the full state-change history for the history view.
+// siteID guards against a slow reply landing after the user opened a
+// different site's history.
+type historyDataMsg struct {
+	siteID  int
+	changes []models.StateChange
+}
+
+// slaDataMsg carries the state changes backing the SLA view for one
+// site+period request. siteID and periodIdx guard stale replies the same way
+// historyDataMsg does.
+type slaDataMsg struct {
+	siteID    int
+	periodIdx int
+	changes   []models.StateChange
+}
+
+// writeDoneMsg reports a store mutation that ran off the UI goroutine. op
+// names the action for the error log; the handler reloads tab data so the UI
+// converges on what was actually written.
+type writeDoneMsg struct {
+	op  string
+	err error
+}
@@ -1,15 +1,63 @@
 package tui

 import (
+	"fmt"
 	"strings"
 	"time"
+
+	"github.com/charmbracelet/lipgloss"
 )

 var sparkChars = []rune{'▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'}

-func latencySparkline(latencies []time.Duration, statuses []bool, width int) string {
+func parseHex(hex string) (r, g, b uint8) {
+	if len(hex) == 7 && hex[0] == '#' {
+		_, _ = fmt.Sscanf(hex[1:], "%02x%02x%02x", &r, &g, &b)
+	}
+	return
+}
+
+func dimColor(hex string, brightness float64) lipgloss.Color {
+	r, g, b := parseHex(hex)
+	f := 0.3 + brightness*0.7
+	return lipgloss.Color(fmt.Sprintf("#%02x%02x%02x",
+		uint8(float64(r)*f),
+		uint8(float64(g)*f),
+		uint8(float64(b)*f),
+	))
+}
+
+func withBg(s lipgloss.Style, bg lipgloss.Color) lipgloss.Style {
+	if bg != "" {
+		return s.Background(bg)
+	}
+	return s
+}
+
+func (m Model) latencyStyle(ms int64, bg lipgloss.Color) lipgloss.Style {
+	var hex string
+	var t float64
+	switch {
+	case ms < 200:
+		hex = m.st.sparkSuccess
+		t = float64(ms) / 200
+	case ms < 500:
+		hex = m.st.sparkWarning
+		t = float64(ms-200) / 300
+	default:
+		hex = m.st.sparkDanger
+		t = float64(ms-500) / 1500
+		if t > 1 {
+			t = 1
+		}
+	}
+	s := lipgloss.NewStyle().Foreground(dimColor(hex, t))
+	return withBg(s, bg)
+}
+
+func (m Model) latencySparkline(latencies []time.Duration, statuses []bool, width int, bg lipgloss.Color) string {
 	if len(latencies) == 0 {
-		return subtleStyle.Render(strings.Repeat("·", width))
+		return withBg(m.st.subtleStyle, bg).Render(strings.Repeat("·", width))
 	}

 	samples := latencies
@@ -30,12 +78,12 @@ func latencySparkline(latencies []time.Duration, statuses []bool, width int) str
 			maxL = l
 		}
 	}
+	spread := maxL - minL

 	var sb strings.Builder
 	if remaining := width - len(samples); remaining > 0 {
-		sb.WriteString(subtleStyle.Render(strings.Repeat("·", remaining)))
+		sb.WriteString(withBg(m.st.subtleStyle, bg).Render(strings.Repeat("·", remaining)))
 	}
-	spread := maxL - minL
 	for i, l := range samples {
 		idx := 0
 		if spread > 0 {
@@ -47,24 +95,17 @@ func latencySparkline(latencies []time.Duration, statuses []bool, width int) str
 		ch := string(sparkChars[idx])
 		isDown := i < len(sampledStatuses) && !sampledStatuses[i]
 		if isDown {
-			sb.WriteString(dangerStyle.Render(ch))
+			sb.WriteString(withBg(m.st.dangerStyle, bg).Render(ch))
 		} else {
-			ms := l.Milliseconds()
-			if ms < 200 {
-				sb.WriteString(specialStyle.Render(ch))
-			} else if ms < 500 {
-				sb.WriteString(warnStyle.Render(ch))
-			} else {
-				sb.WriteString(dangerStyle.Render(ch))
-			}
+			sb.WriteString(m.latencyStyle(l.Milliseconds(), bg).Render(ch))
 		}
 	}
 	return sb.String()
 }

-func heartbeatSparkline(statuses []bool, width int) string {
+func (m Model) heartbeatSparkline(statuses []bool, width int, bg lipgloss.Color) string {
 	if len(statuses) == 0 {
-		return subtleStyle.Render(strings.Repeat("·", width))
+		return withBg(m.st.subtleStyle, bg).Render(strings.Repeat("·", width))
 	}

 	samples := statuses
@@ -74,19 +115,35 @@ func heartbeatSparkline(statuses []bool, width int) string {

 	var sb strings.Builder
 	if remaining := width - len(samples); remaining > 0 {
-		sb.WriteString(subtleStyle.Render(strings.Repeat("·", remaining)))
+		sb.WriteString(withBg(m.st.subtleStyle, bg).Render(strings.Repeat("·", remaining)))
 	}
 	for _, up := range samples {
 		if up {
-			sb.WriteString(specialStyle.Render("▁"))
+			sb.WriteString(withBg(m.st.specialStyle, bg).Render("▁"))
 		} else {
-			sb.WriteString(dangerStyle.Render("█"))
+			sb.WriteString(withBg(m.st.dangerStyle, bg).Render("█"))
 		}
 	}
 	return sb.String()
 }

-func (m Model) groupSparkline(groupID int, width int) string {
+func resolveSparklineIndex(x, sparkWidth, dataLen int) int {
+	visible := dataLen
+	if visible > sparkWidth {
+		visible = sparkWidth
+	}
+	padding := sparkWidth - visible
+	if x < padding {
+		return -1
+	}
+	offset := 0
+	if dataLen > sparkWidth {
+		offset = dataLen - sparkWidth
+	}
+	return offset + (x - padding)
+}
+
+func (m Model) groupSparkline(groupID int, width int, bg lipgloss.Color) string {
 	allSites := m.engine.GetAllSites()
 	var childStatuses [][]bool
 	for _, s := range allSites {
@@ -99,7 +156,7 @@ func (m Model) groupSparkline(groupID int, width int) string {
 	}

 	if len(childStatuses) == 0 {
-		return subtleStyle.Render(strings.Repeat("·", width))
+		return withBg(m.st.subtleStyle, bg).Render(strings.Repeat("·", width))
 	}

 	maxLen := 0
@@ -127,13 +184,13 @@ func (m Model) groupSparkline(groupID int, width int) string {

 	var sb strings.Builder
 	if remaining := width - len(aggregated); remaining > 0 {
-		sb.WriteString(subtleStyle.Render(strings.Repeat("·", remaining)))
+		sb.WriteString(withBg(m.st.subtleStyle, bg).Render(strings.Repeat("·", remaining)))
 	}
 	for _, up := range aggregated {
 		if up {
-			sb.WriteString(specialStyle.Render("●"))
+			sb.WriteString(withBg(m.st.subtleStyle, bg).Render("·"))
 		} else {
-			sb.WriteString(dangerStyle.Render("●"))
+			sb.WriteString(withBg(m.st.dangerStyle, bg).Render("•"))
 		}
 	}
 	return sb.String()
@@ -151,7 +208,7 @@ func (m Model) groupUptime(groupID int) string {
 		}
 	}
 	if len(allStatuses) == 0 {
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	}
 	total, up := 0, 0
 	for _, statuses := range allStatuses {
@@ -162,7 +219,7 @@ func (m Model) groupUptime(groupID int) string {
 			}
 		}
 	}
-	return fmtUptime(func() []bool {
+	return m.fmtUptime(func() []bool {
 		out := make([]bool, total)
 		idx := 0
 		for _, statuses := range allStatuses {
@@ -4,10 +4,11 @@ import (
 	"strings"
 	"testing"
 	"time"
+	"unicode/utf8"
 )

 func TestLatencySparkline_Empty(t *testing.T) {
-	got := latencySparkline(nil, nil, 10)
+	got := styledModel.latencySparkline(nil, nil, 10, "")
 	if !strings.Contains(got, "··········") {
 		t.Errorf("empty sparkline should be dots, got %q", got)
 	}
@@ -16,10 +17,13 @@ func TestLatencySparkline_Empty(t *testing.T) {
 func TestLatencySparkline_SingleValue(t *testing.T) {
 	latencies := []time.Duration{100 * time.Millisecond}
 	statuses := []bool{true}
-	got := latencySparkline(latencies, statuses, 5)
+	got := styledModel.latencySparkline(latencies, statuses, 5, "")
 	if len(got) == 0 {
 		t.Error("sparkline should not be empty")
 	}
+	if !strings.Contains(got, "····") {
+		t.Errorf("single value with width=5 should have 4 dot padding, got %q", got)
+	}
 }

 func TestLatencySparkline_WidthTruncation(t *testing.T) {
@@ -29,14 +33,90 @@ func TestLatencySparkline_WidthTruncation(t *testing.T) {
 		latencies[i] = time.Duration(i*50) * time.Millisecond
 		statuses[i] = true
 	}
-	got := latencySparkline(latencies, statuses, 5)
+	got := styledModel.latencySparkline(latencies, statuses, 5, "")
 	if len(got) == 0 {
 		t.Error("sparkline should not be empty")
 	}
+	if strings.Contains(got, "·") {
+		t.Errorf("20 samples in width=5 should have no padding, got %q", got)
+	}
+}
+
+func TestLatencySparkline_RelativeHeight(t *testing.T) {
+	latencies := []time.Duration{10 * time.Millisecond, 50 * time.Millisecond, 10 * time.Millisecond}
+	statuses := []bool{true, true, true}
+	out := stripANSI(styledModel.latencySparkline(latencies, statuses, 3, ""))
+	runes := []rune(out)
+	if len(runes) < 3 {
+		t.Fatalf("expected 3 runes, got %d", len(runes))
+	}
+	if runes[0] == runes[1] {
+		t.Errorf("min and max should have different bar heights, got %c %c %c", runes[0], runes[1], runes[2])
+	}
+}
+
+func TestLatencyStyle_BandsProduceDifferentColors(t *testing.T) {
+	st := newStyles(themeFlexokiDark)
+	st.sparkSuccess = "#00ff00"
+	st.sparkWarning = "#ffff00"
+	st.sparkDanger = "#ff0000"
+	m := Model{st: st}
+
+	green := m.latencyStyle(50, "")
+	yellow := m.latencyStyle(300, "")
+	red := m.latencyStyle(800, "")
+
+	gfg := green.GetForeground()
+	yfg := yellow.GetForeground()
+	rfg := red.GetForeground()
+
+	if gfg == yfg || yfg == rfg || gfg == rfg {
+		t.Errorf("bands should produce distinct foreground colors: green=%v yellow=%v red=%v", gfg, yfg, rfg)
+	}
+}
+
+func TestLatencyStyle_BrightnessVariesWithinBand(t *testing.T) {
+	st := newStyles(themeFlexokiDark)
+	st.sparkSuccess = "#00ff00"
+	m := Model{st: st}
+
+	dim := m.latencyStyle(10, "")
+	bright := m.latencyStyle(190, "")
+
+	if dim.GetForeground() == bright.GetForeground() {
+		t.Error("10ms and 190ms should have different brightness within green band")
+	}
+}
+
+func TestLatencySparkline_OutputWidth(t *testing.T) {
+	latencies := []time.Duration{100 * time.Millisecond, 200 * time.Millisecond, 300 * time.Millisecond}
+	statuses := []bool{true, true, true}
+	got := styledModel.latencySparkline(latencies, statuses, 5, "")
+	count := utf8.RuneCountInString(stripANSI(got))
+	if count != 5 {
+		t.Errorf("expected 5 rune-width output, got %d from %q", count, got)
+	}
+}
+
+func stripANSI(s string) string {
+	var out strings.Builder
+	i := 0
+	for i < len(s) {
+		if s[i] == '\x1b' {
+			for i < len(s) && s[i] != 'm' {
+				i++
+			}
+			i++
+			continue
+		}
+		out.WriteByte(s[i])
+		i++
+	}
+	return out.String()
 }

 func TestHeartbeatSparkline_Empty(t *testing.T) {
-	got := heartbeatSparkline(nil, 10)
+	got := styledModel.heartbeatSparkline(nil, 10, "")
 	if !strings.Contains(got, "··········") {
 		t.Errorf("empty heartbeat should be dots, got %q", got)
 	}
@@ -44,7 +124,7 @@ func TestHeartbeatSparkline_Empty(t *testing.T) {

 func TestHeartbeatSparkline_Mixed(t *testing.T) {
 	statuses := []bool{true, false, true, true, false}
-	got := heartbeatSparkline(statuses, 5)
+	got := styledModel.heartbeatSparkline(statuses, 5, "")
 	if len(got) == 0 {
 		t.Error("heartbeat sparkline should not be empty")
 	}
@@ -52,8 +132,39 @@ func TestHeartbeatSparkline_Mixed(t *testing.T) {

 func TestHeartbeatSparkline_PaddedWidth(t *testing.T) {
 	statuses := []bool{true, true}
-	got := heartbeatSparkline(statuses, 5)
+	got := styledModel.heartbeatSparkline(statuses, 5, "")
 	if !strings.Contains(got, "···") {
 		t.Errorf("should have dot padding for width > data, got %q", got)
 	}
 }
+
+func TestResolveSparklineIndex(t *testing.T) {
+	tests := []struct {
+		name       string
+		x          int
+		sparkWidth int
+		dataLen    int
+		want       int
+	}{
+		{"exact fit first", 0, 5, 5, 0},
+		{"exact fit last", 4, 5, 5, 4},
+		{"padding returns -1", 0, 10, 5, -1},
+		{"padding boundary", 4, 10, 5, -1},
+		{"first data after padding", 5, 10, 5, 0},
+		{"last data after padding", 9, 10, 5, 4},
+		{"truncated first visible", 0, 5, 20, 15},
+		{"truncated last visible", 4, 5, 20, 19},
+		{"single data point", 9, 10, 1, 0},
+		{"single data point on padding", 0, 10, 1, -1},
+		{"zero data", 0, 10, 0, -1},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := resolveSparklineIndex(tt.x, tt.sparkWidth, tt.dataLen)
+			if got != tt.want {
+				t.Errorf("resolveSparklineIndex(%d, %d, %d) = %d, want %d",
+					tt.x, tt.sparkWidth, tt.dataLen, got, tt.want)
+			}
+		})
+	}
+}
@@ -1,10 +1,13 @@
 package tui

 import (
+	"context"
 	"fmt"
+	neturl "net/url"
+	"sort"
 	"strings"
-	"time"

+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 	"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
 	tea "github.com/charmbracelet/bubbletea"
 	"github.com/charmbracelet/huh"
@@ -72,10 +75,7 @@ func fmtAlertType(t string) string {
 	}
 }

-func fmtAlertConfig(alert struct {
-	Type     string
-	Settings map[string]string
-}) string {
+func (m Model) fmtAlertConfig(alert models.AlertConfig) string {
 	switch alert.Type {
 	case "email":
 		host := alert.Settings["host"]
@@ -86,85 +86,91 @@ func fmtAlertConfig(alert struct {
 		if host != "" {
 			return limitStr(host, 34)
 		}
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	case "ntfy":
 		topic := alert.Settings["topic"]
 		url := alert.Settings["url"]
 		if url != "" && topic != "" {
 			return limitStr(fmt.Sprintf("%s/%s", url, topic), 34)
 		}
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	case "telegram":
 		if id := alert.Settings["chat_id"]; id != "" {
 			return limitStr(fmt.Sprintf("chat:%s", id), 34)
 		}
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	case "pagerduty":
 		if key := alert.Settings["routing_key"]; key != "" {
-			return limitStr(key, 34)
+			return limitStr(maskSecret(key), 34)
 		}
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	case "pushover":
 		if user := alert.Settings["user"]; user != "" {
-			return limitStr(fmt.Sprintf("user:%s", user), 34)
+			return limitStr(fmt.Sprintf("user:%s", maskSecret(user)), 34)
 		}
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	case "gotify":
+		// The gotify server URL identifies the target; the token is the
+		// secret and is never shown here.
 		if url := alert.Settings["url"]; url != "" {
 			return limitStr(url, 34)
 		}
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	case "opsgenie":
 		key := alert.Settings["api_key"]
 		if key != "" {
-			masked := key
-			if len(masked) > 8 {
-				masked = masked[:4] + "…" + masked[len(masked)-4:]
-			}
+			masked := maskSecret(key)
 			if alert.Settings["eu"] == "true" {
 				return limitStr(fmt.Sprintf("EU %s", masked), 34)
 			}
 			return limitStr(masked, 34)
 		}
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	default:
-		if val, ok := alert.Settings["url"]; ok {
-			return limitStr(val, 34)
+		// discord/slack/webhook: the URL path IS the credential — show only
+		// enough to identify the target.
+		if val, ok := alert.Settings["url"]; ok && val != "" {
+			return limitStr(maskWebhookURL(val), 34)
 		}
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	}
 }

-func fmtAlertHealth(h monitor.AlertHealth) string {
+// maskSecret keeps just enough of a credential to identify it.
+func maskSecret(s string) string {
+	if len(s) > 8 {
+		return s[:4] + "…" + s[len(s)-4:]
+	}
+	return "●●●●●●●●"
+}
+
+// maskWebhookURL shows scheme and host only. For discord, slack, and generic
+// webhooks the URL path carries the token, so the path is never rendered.
+func maskWebhookURL(raw string) string {
+	u, err := neturl.Parse(raw)
+	if err != nil || u.Host == "" {
+		return "●●●●●●●●"
+	}
+	return u.Scheme + "://" + u.Host + "/…"
+}
+
+func (m Model) fmtAlertHealth(h monitor.AlertHealth) string {
 	if h.LastSendAt.IsZero() {
-		return subtleStyle.Render("●")
+		return m.st.subtleStyle.Render("●")
 	}
 	if h.LastSendOK {
-		return specialStyle.Render("●")
+		return m.st.specialStyle.Render("●")
 	}
-	return dangerStyle.Render("●")
+	return m.st.dangerStyle.Render("●")
 }

-func fmtAlertLastSent(h monitor.AlertHealth) string {
-	if h.LastSendAt.IsZero() {
-		return subtleStyle.Render("never")
-	}
-	d := time.Since(h.LastSendAt)
-	if d < time.Minute {
-		return fmt.Sprintf("%ds ago", int(d.Seconds()))
-	}
-	if d < time.Hour {
-		return fmt.Sprintf("%dm ago", int(d.Minutes()))
-	}
-	if d < 24*time.Hour {
-		return fmt.Sprintf("%dh ago", int(d.Hours()))
-	}
-	return fmt.Sprintf("%dd ago", int(d.Hours())/24)
+func (m Model) fmtAlertLastSent(h monitor.AlertHealth) string {
+	return m.fmtTimeAgo(h.LastSendAt)
 }

 func (m Model) viewAlertsTab() string {
 	if len(m.alerts) == 0 {
-		return "\n  No alert channels configured. Press [n] to add one."
+		return m.emptyState("No alert channels configured.", "[n] Add your first alert")
 	}

 	var headers []string
@@ -189,14 +195,11 @@ func (m Model) viewAlertsTab() string {
 				h := m.engine.GetAlertHealth(a.ID)
 				rows = append(rows, []string{
 					fmt.Sprintf("%d", i+1),
-					fmtAlertHealth(h),
+					m.fmtAlertHealth(h),
 					m.zones.Mark(fmt.Sprintf("alert-%d", i), limitStr(a.Name, nameW-2)),
 					fmtAlertType(a.Type),
-					limitStr(fmtAlertConfig(struct {
-						Type     string
-						Settings map[string]string
-					}{a.Type, a.Settings}), cfgW-2),
-					fmtAlertLastSent(h),
+					limitStr(m.fmtAlertConfig(a), cfgW-2),
+					m.fmtAlertLastSent(h),
 				})
 			}
 			return rows
@@ -214,39 +217,55 @@ func (m Model) viewAlertDetailPanel() string {

 	var b strings.Builder

-	b.WriteString(subtleStyle.Render("  Alerts > ") + titleStyle.Render(a.Name) + "\n\n")
+	b.WriteString(m.st.subtleStyle.Render("  Alerts > ") + m.st.titleStyle.Render(a.Name) + "\n")
+	b.WriteString(m.divider() + "\n")

 	row := func(label, value string) {
-		fmt.Fprintf(&b, "  %-16s %s\n", subtleStyle.Render(label), value)
+		fmt.Fprintf(&b, "  %-16s %s\n", m.st.subtleStyle.Render(label), value)
 	}

 	row("Type", fmtAlertType(a.Type))

 	if h.LastSendAt.IsZero() {
-		row("Health", subtleStyle.Render("never sent"))
+		row("Health", m.st.subtleStyle.Render("never sent"))
 	} else if h.LastSendOK {
-		row("Health", specialStyle.Render("OK"))
+		row("Health", m.st.specialStyle.Render("OK"))
 	} else {
-		row("Health", dangerStyle.Render("FAILED"))
+		row("Health", m.st.dangerStyle.Render("FAILED"))
 	}

 	if !h.LastSendAt.IsZero() {
-		row("Last Sent", h.LastSendAt.Format("2006-01-02 15:04:05")+" ("+fmtAlertLastSent(h)+")")
+		row("Last Sent", h.LastSendAt.Format("2006-01-02 15:04:05")+" ("+m.fmtAlertLastSent(h)+")")
 	}
 	if h.SendCount > 0 {
 		row("Sends", fmt.Sprintf("%d sent, %d failed", h.SendCount, h.FailCount))
 	}
 	if h.LastError != "" {
-		row("Last Error", dangerStyle.Render(limitStr(h.LastError, 60)))
+		row("Last Error", m.st.dangerStyle.Render(limitStr(h.LastError, 60)))
 	}

-	b.WriteString("\n" + subtleStyle.Render("  CONFIGURATION") + "\n")
-	for k, v := range a.Settings {
+	b.WriteString(m.divider() + "\n")
+	b.WriteString(m.st.subtleStyle.Render("  CONFIGURATION") + "\n")
+	// Render through the same allowlist the backup export uses — this panel
+	// ends up in screen shares and asciinema recordings. Keys are sorted so
+	// rows don't reshuffle every render.
+	redacted := models.RedactAlertSettings(a.Type, a.Settings)
+	keys := make([]string, 0, len(redacted))
+	for k := range redacted {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	for _, k := range keys {
+		v := redacted[k]
+		if v == "***REDACTED***" {
+			row(k, m.st.subtleStyle.Render("●●●●●●●●"))
+			continue
+		}
 		row(k, v)
 	}

-	b.WriteString("\n\n")
-	b.WriteString(subtleStyle.Render("  [i/Esc] Back  [e] Edit  [t] Test  [q] Quit"))
+	b.WriteString(m.divider() + "\n")
+	b.WriteString(m.st.subtleStyle.Render("  [q/Esc] Back  [e] Edit  [t] Test"))

 	return lipgloss.NewStyle().Padding(1, 2).Render(b.String())
 }
@@ -457,7 +476,7 @@ func (m *Model) initAlertHuhForm() tea.Cmd {
 	return m.huhForm.Init()
 }

-func (m *Model) submitAlertForm() {
+func (m *Model) submitAlertForm() tea.Cmd {
 	d := m.alertFormData
 	settings := make(map[string]string)

@@ -498,14 +517,16 @@ func (m *Model) submitAlertForm() {
 		settings["url"] = d.WebhookURL
 	}

-	if m.editID > 0 {
-		if err := m.store.UpdateAlert(m.editID, d.Name, d.AlertType, settings); err != nil {
-			m.engine.AddLog("Update alert failed: " + err.Error())
-		}
-	} else {
-		if err := m.store.AddAlert(d.Name, d.AlertType, settings); err != nil {
-			m.engine.AddLog("Add alert failed: " + err.Error())
-		}
-	}
+	st := m.store
+	id := m.editID
+	name, aType := d.Name, d.AlertType
 	m.state = stateDashboard
+	if id > 0 {
+		return writeCmd("Update alert", func() error {
+			return st.UpdateAlert(context.Background(), id, name, aType, settings)
+		})
+	}
+	return writeCmd("Add alert", func() error {
+		return st.AddAlert(context.Background(), name, aType, settings)
+	})
 }
@@ -0,0 +1,62 @@
+package tui
+
+import (
+	"strings"
+	"testing"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+)
+
+func TestAlertDetailPanel_MasksSecretsStableOrder(t *testing.T) {
+	m := newTestModel(&tuiMockStore{})
+	m.termWidth, m.termHeight = 120, 40
+	m.alerts = []models.AlertConfig{{
+		ID: 1, Name: "ops", Type: "email",
+		Settings: map[string]string{
+			"host": "smtp.example.com",
+			"port": "587",
+			"user": "oncall@example.com",
+			"pass": "hunter2-secret",
+			"to":   "team@example.com",
+		},
+	}}
+	m.cursor = 0
+
+	out := m.viewAlertDetailPanel()
+	if strings.Contains(out, "hunter2-secret") {
+		t.Error("SMTP password rendered in alert detail panel")
+	}
+	if strings.Contains(out, "oncall@example.com") {
+		t.Error("SMTP user (not on the allowlist) rendered in alert detail panel")
+	}
+	if !strings.Contains(out, "smtp.example.com") {
+		t.Error("allowlisted setting (host) missing from panel")
+	}
+
+	// Map iteration must not reshuffle rows between renders.
+	for i := 0; i < 5; i++ {
+		if m.viewAlertDetailPanel() != out {
+			t.Fatal("panel output unstable across renders — settings keys not sorted")
+		}
+	}
+}
+
+func TestFmtAlertConfig_MasksSecrets(t *testing.T) {
+	m := newTestModel(&tuiMockStore{})
+
+	webhook := m.fmtAlertConfig(models.AlertConfig{Type: "discord", Settings: map[string]string{"url": "https://discord.com/api/webhooks/123456/SeCrEtToKeN"}})
+	if strings.Contains(webhook, "SeCrEtToKeN") || strings.Contains(webhook, "123456") {
+		t.Errorf("webhook URL path (the credential) rendered in table: %q", webhook)
+	}
+	if !strings.Contains(webhook, "discord.com") {
+		t.Errorf("webhook host missing from table config: %q", webhook)
+	}
+
+	pd := m.fmtAlertConfig(models.AlertConfig{Type: "pagerduty", Settings: map[string]string{"routing_key": "R0123456789ABCDEFGHIJ"}})
+	if strings.Contains(pd, "R0123456789ABCDEFGHIJ") {
+		t.Errorf("pagerduty routing key rendered raw in table: %q", pd)
+	}
+	if !strings.Contains(pd, "R012") || !strings.Contains(pd, "GHIJ") {
+		t.Errorf("masked routing key should keep identifying ends: %q", pd)
+	}
+}
@@ -48,30 +48,30 @@ func isImportantLog(sev logSeverity) bool {
 	return sev == severityDown || sev == severityUp || sev == severitySystem
 }

-func renderLogTag(sev logSeverity) string {
+func (m Model) renderLogTag(sev logSeverity) string {
 	switch sev {
 	case severityDown:
-		return dangerStyle.Render(" DOWN ")
+		return m.st.dangerStyle.Render(" DOWN ")
 	case severityUp:
-		return specialStyle.Render("  UP  ")
+		return m.st.specialStyle.Render("  UP  ")
 	case severityWarn:
-		return warnStyle.Render(" WARN ")
+		return m.st.warnStyle.Render(" WARN ")
 	case severitySystem:
-		return titleStyle.Render(" SYS  ")
+		return m.st.titleStyle.Render(" SYS  ")
 	default:
-		return subtleStyle.Render(" info ")
+		return m.st.subtleStyle.Render(" info ")
 	}
 }

-func renderLogLine(line string) string {
+func (m Model) renderLogLine(line string) string {
 	sev := classifyLog(line)
-	tag := renderLogTag(sev)
+	tag := m.renderLogTag(sev)

 	ts := ""
 	msg := line
 	if len(line) > 10 && line[0] == '[' {
 		if idx := strings.Index(line, "]"); idx > 0 && idx < 12 {
-			ts = subtleStyle.Render(line[1:idx])
+			ts = m.st.subtleStyle.Render(line[1:idx])
 			msg = strings.TrimSpace(line[idx+1:])
 		}
 	}
@@ -82,18 +82,15 @@ func renderLogLine(line string) string {
 	return fmt.Sprintf("  %s  %s", tag, msg)
 }

-func (m Model) viewLogsTab() string {
-	content := m.logViewport.View()
-	if strings.TrimSpace(content) == "" || content == "Waiting for logs..." {
-		return "\n  No log entries yet. Logs appear as monitors run checks."
-	}
-
-	lines := strings.Split(content, "\n")
+// refreshLogContent rebuilds the log viewport from the full engine log list,
+// filtering before windowing so the entry count and "(n hidden)" reflect all
+// logs, not just the visible viewport slice.
+func (m *Model) refreshLogContent() {
 	var rendered []string
 	total := 0
 	shown := 0

-	for _, line := range lines {
+	for _, line := range m.engine.GetLogs() {
 		if strings.TrimSpace(line) == "" {
 			continue
 		}
@@ -103,7 +100,17 @@ func (m Model) viewLogsTab() string {
 			continue
 		}
 		shown++
-		rendered = append(rendered, renderLogLine(line))
+		rendered = append(rendered, m.renderLogLine(line))
+	}
+
+	m.logTotal = total
+	m.logShown = shown
+	m.logViewport.SetContent(strings.Join(rendered, "\n"))
+}
+
+func (m Model) viewLogsTab() string {
+	if m.logTotal == 0 {
+		return m.emptyState("No log entries yet.", "Logs appear as monitors run checks")
 	}

 	filterLabel := "All"
@@ -111,12 +118,12 @@ func (m Model) viewLogsTab() string {
 		filterLabel = "Important"
 	}

-	header := subtleStyle.Render(fmt.Sprintf(
-		"  %d entries  [↑/↓] Scroll  [PgUp/PgDn] Page  [f] Filter: %s", shown, filterLabel))
+	header := m.st.subtleStyle.Render(fmt.Sprintf(
+		"  %d entries  Filter: %s", m.logShown, filterLabel))

-	if m.logFilterImportant && shown < total {
-		header += subtleStyle.Render(fmt.Sprintf("  (%d hidden)", total-shown))
+	if m.logFilterImportant && m.logShown < m.logTotal {
+		header += m.st.subtleStyle.Render(fmt.Sprintf("  (%d hidden)", m.logTotal-m.logShown))
 	}

-	return "\n" + header + "\n\n" + strings.Join(rendered, "\n")
+	return "\n" + header + "\n\n" + m.logViewport.View()
 }
@@ -1,6 +1,7 @@
 package tui

 import (
+	"context"
 	"fmt"
 	"strconv"
 	"time"
@@ -9,11 +10,8 @@ import (

 	tea "github.com/charmbracelet/bubbletea"
 	"github.com/charmbracelet/huh"
-	"github.com/charmbracelet/lipgloss"
 )

-var maintStyle lipgloss.Style
-
 type maintFormData struct {
 	Title       string
 	Description string
@@ -23,22 +21,22 @@ type maintFormData struct {
 	CustomHours string
 }

-func fmtMaintStatus(mw models.MaintenanceWindow) string {
+func (m Model) fmtMaintStatus(mw models.MaintenanceWindow) string {
 	now := time.Now()
 	if mw.StartTime.After(now) {
-		return warnStyle.Render("SCHEDULED")
+		return m.st.warnStyle.Render("SCHEDULED")
 	}
 	if !mw.EndTime.IsZero() && mw.EndTime.Before(now) {
-		return subtleStyle.Render("ENDED")
+		return m.st.subtleStyle.Render("ENDED")
 	}
-	return specialStyle.Render("ACTIVE")
+	return m.st.specialStyle.Render("ACTIVE")
 }

-func fmtMaintType(t string) string {
+func (m Model) fmtMaintType(t string) string {
 	if t == "incident" {
-		return dangerStyle.Render("incident")
+		return m.st.dangerStyle.Render("incident")
 	}
-	return maintStyle.Render("maintenance")
+	return m.st.maintStyle.Render("maintenance")
 }

 func fmtMaintMonitorW(monitorID int, sites []models.Site, maxW int) string {
@@ -53,9 +51,9 @@ func fmtMaintMonitorW(monitorID int, sites []models.Site, maxW int) string {
 	return fmt.Sprintf("#%d", monitorID)
 }

-func fmtMaintTime(t time.Time, colW int) string {
+func (m Model) fmtMaintTime(t time.Time, colW int) string {
 	if t.IsZero() {
-		return subtleStyle.Render("—")
+		return m.st.subtleStyle.Render("—")
 	}
 	now := time.Now()
 	if t.Year() == now.Year() && t.YearDay() == now.YearDay() {
@@ -93,7 +91,7 @@ func (m Model) isMonitorInMaintenance(monitorID int) bool {

 func (m Model) viewMaintTab() string {
 	if len(m.maintenanceWindows) == 0 {
-		return "\n  No maintenance windows or incidents. Press [n] to create one."
+		return m.emptyState("No maintenance windows or incidents.", "[n] Create one")
 	}

 	var headers []string
@@ -120,11 +118,11 @@ func (m Model) viewMaintTab() string {
 				rows = append(rows, []string{
 					strconv.Itoa(i + 1),
 					m.zones.Mark(fmt.Sprintf("maint-%d", i), limitStr(mw.Title, titleW-2)),
-					fmtMaintType(mw.Type),
+					m.fmtMaintType(mw.Type),
 					fmtMaintMonitorW(mw.MonitorID, allSites, monW-2),
-					fmtMaintStatus(mw),
-					fmtMaintTime(mw.StartTime, timeW),
-					fmtMaintTime(mw.EndTime, timeW),
+					m.fmtMaintStatus(mw),
+					m.fmtMaintTime(mw.StartTime, timeW),
+					m.fmtMaintTime(mw.EndTime, timeW),
 				})
 			}
 			return rows
@@ -209,7 +207,7 @@ func (m *Model) initMaintHuhForm() tea.Cmd {
 	return m.huhForm.Init()
 }

-func (m *Model) submitMaintForm() {
+func (m *Model) submitMaintForm() tea.Cmd {
 	d := m.maintFormData
 	monitorID, _ := strconv.Atoi(d.MonitorID)

@@ -240,8 +238,9 @@ func (m *Model) submitMaintForm() {
 		}
 	}

-	if err := m.store.AddMaintenanceWindow(mw); err != nil {
-		m.engine.AddLog("Add maintenance window failed: " + err.Error())
-	}
+	st := m.store
 	m.state = stateDashboard
+	return writeCmd("Add maintenance window", func() error {
+		return st.AddMaintenanceWindow(context.Background(), mw)
+	})
 }
@@ -1,13 +1,12 @@
 package tui

 import (
-	"fmt"
 	"time"
 )

 func (m Model) viewNodesTab() string {
 	if len(m.nodes) == 0 {
-		return "\n  No probe nodes connected."
+		return m.emptyState("No probe nodes connected.", "")
 	}

 	var headers []string
@@ -34,14 +33,14 @@ func (m Model) viewNodesTab() string {
 				}
 				region := node.Region
 				if region == "" {
-					region = subtleStyle.Render("—")
+					region = m.st.subtleStyle.Render("—")
 				}
-				lastSeen := fmtNodeLastSeen(node.LastSeen)
+				lastSeen := m.fmtNodeLastSeen(node.LastSeen)
 				version := node.Version
 				if version == "" {
-					version = subtleStyle.Render("—")
+					version = m.st.subtleStyle.Render("—")
 				}
-				status := fmtNodeStatus(node.LastSeen)
+				status := m.fmtNodeStatus(node.LastSeen)
 				rows = append(rows, []string{name, region, lastSeen, version, status})
 			}
 			return rows
@@ -51,30 +50,20 @@ func (m Model) viewNodesTab() string {
 	)
 }

-func fmtNodeStatus(lastSeen time.Time) string {
+func (m Model) fmtNodeStatus(lastSeen time.Time) string {
 	if lastSeen.IsZero() {
-		return subtleStyle.Render("UNKNOWN")
+		return m.st.subtleStyle.Render("UNKNOWN")
 	}
 	ago := time.Since(lastSeen)
 	if ago < 60*time.Second {
-		return specialStyle.Render("ONLINE")
+		return m.st.specialStyle.Render("ONLINE")
 	}
 	if ago < 5*time.Minute {
-		return warnStyle.Render("STALE")
+		return m.st.warnStyle.Render("STALE")
 	}
-	return dangerStyle.Render("OFFLINE")
+	return m.st.dangerStyle.Render("OFFLINE")
 }

-func fmtNodeLastSeen(t time.Time) string {
-	if t.IsZero() {
-		return subtleStyle.Render("never")
-	}
-	ago := time.Since(t)
-	if ago < time.Minute {
-		return fmt.Sprintf("%ds ago", int(ago.Seconds()))
-	}
-	if ago < time.Hour {
-		return fmt.Sprintf("%dm ago", int(ago.Minutes()))
-	}
-	return fmt.Sprintf("%dh ago", int(ago.Hours()))
+func (m Model) fmtNodeLastSeen(t time.Time) string {
+	return m.fmtTimeAgo(t)
 }
@@ -1,6 +1,7 @@
 package tui

 import (
+	"context"
 	"fmt"
 	"net/url"
 	"strconv"
@@ -11,8 +12,6 @@ import (
 	"github.com/charmbracelet/lipgloss"
 )

-var siteGroupStyle lipgloss.Style
-
 type siteFormData struct {
 	Name          string
 	SiteType      string
@@ -33,27 +32,74 @@ type siteFormData struct {
 	Regions       string
 }

+type colKey int
+
+const (
+	colNum colKey = iota
+	colName
+	colType
+	colStatus
+	colLatency
+	colUptime
+	colHistory
+	colSSL
+	colRetries
+)
+
+type columnDef struct {
+	key     colKey
+	wide    string
+	narrow  string
+	wideW   int
+	narrowW int
+	minTerm int // minimum terminal width to show (0 = always)
+}
+
+var siteColumns = []columnDef{
+	{colNum, "#", "#", 4, 4, 0},
+	{colName, "NAME", "NAME", 0, 0, 0},
+	{colType, "TYPE", "TYPE", 10, 8, mediumBreakpoint},
+	{colStatus, "STATUS", "STATUS", 10, 10, 0},
+	{colLatency, "LATENCY", "LAT", 10, 7, 0},
+	{colUptime, "UPTIME", "UP%", 8, 8, mediumBreakpoint},
+	{colHistory, "HISTORY", "HISTORY", 0, 0, mediumBreakpoint},
+	{colSSL, "SSL", "SSL", 7, 5, wideBreakpoint},
+	{colRetries, "RETRIES", "RT", 9, 5, wideBreakpoint},
+}
+
 type tableLayout struct {
 	nameW, sparkW int
 	headers       []string
 	colWidths     []int
+	active        []colKey
 }

 func (m Model) computeLayout() tableLayout {
 	wide := m.isWide()

-	var fixed int
+	var active []colKey
 	var headers []string
 	var widths []int
+	var fixed int

+	for _, c := range siteColumns {
+		if c.minTerm > 0 && m.termWidth < c.minTerm {
+			continue
+		}
+		active = append(active, c.key)
 		if wide {
-		headers = []string{"#", "NAME", "TYPE", "STATUS", "LATENCY", "UPTIME", "HISTORY", "SSL", "RETRIES"}
-		widths = []int{4, 0, 10, 10, 10, 8, 0, 7, 9}
-		fixed = 4 + 10 + 10 + 10 + 8 + 7 + 9
+			headers = append(headers, c.wide)
+			widths = append(widths, c.wideW)
+			if c.wideW > 0 {
+				fixed += c.wideW
+			}
 		} else {
-		headers = []string{"#", "NAME", "TYPE", "STATUS", "LAT", "UP%", "HISTORY", "SSL", "RT"}
-		widths = []int{4, 0, 8, 10, 7, 8, 0, 5, 5}
-		fixed = 4 + 8 + 10 + 7 + 8 + 5 + 5
+			headers = append(headers, c.narrow)
+			widths = append(widths, c.narrowW)
+			if c.narrowW > 0 {
+				fixed += c.narrowW
+			}
+		}
 	}

 	numCols := len(headers)
@@ -71,46 +117,71 @@ func (m Model) computeLayout() tableLayout {
 	}
 	maxName += 4

-	nameW := avail / 2
+	hasHistory := false
+	for _, k := range active {
+		if k == colHistory {
+			hasHistory = true
+			break
+		}
+	}
+
+	var nameW, sparkW int
+	if hasHistory {
+		nameW = avail / 2
+		sparkW = avail - nameW
+	} else {
+		nameW = avail
+		sparkW = 0
+	}
+
 	if nameW > maxName {
 		nameW = maxName
 	}
 	if nameW < 13 {
 		nameW = 13
 	}
-	if nameW > 40 {
-		nameW = 40
+	if nameW > 35 {
+		nameW = 35
+	}
+	if sparkW > 0 {
+		if sparkW < 15 {
+			sparkW = 15
+		}
+		if sparkW > 62 {
+			sparkW = 62
+		}
 	}

-	sparkW := avail - nameW
-	if sparkW < 10 {
-		sparkW = 10
+	for i, k := range active {
+		if k == colName {
+			widths[i] = nameW
+		}
+		if k == colHistory {
+			widths[i] = sparkW
+		}
 	}
-
-	widths[1] = nameW
-	widths[6] = sparkW

 	return tableLayout{
 		nameW:     nameW,
 		sparkW:    sparkW,
 		headers:   headers,
 		colWidths: widths,
+		active:    active,
 	}
 }

+func pickCols(active []colKey, allCells map[colKey]string) []string {
+	row := make([]string, len(active))
+	for i, k := range active {
+		row[i] = allCells[k]
+	}
+	return row
+}
+
 func (m Model) viewSitesTab() string {

 	if len(m.sites) == 0 {
-		welcome := lipgloss.NewStyle().
-			Border(lipgloss.RoundedBorder()).
-			BorderForeground(m.theme.Accent).
-			Padding(1, 3).
-			Render(
-				titleStyle.Render("uptop") + "\n\n" +
-					"No monitors configured yet.\n\n" +
-					subtleStyle.Render("[n] Add your first monitor"),
-			)
-		return "\n" + welcome
+		return m.emptyState(m.st.titleStyle.Render("uptop")+"\n\nNo monitors configured yet.", "[n] Add your first monitor")
 	}

 	layout := m.computeLayout()
@@ -119,6 +190,9 @@ func (m Model) viewSitesTab() string {
 	if sparkWidth < 8 {
 		sparkWidth = 8
 	}
+	if sparkWidth > 60 {
+		sparkWidth = 60
+	}

 	var groupRows map[int]bool
 	return m.renderTable(
@@ -129,21 +203,29 @@ func (m Model) viewSitesTab() string {
 			var rows [][]string
 			for i := start; i < end; i++ {
 				site := m.sites[i]
+				rowIdx := i - start
+				var rowBg lipgloss.Color
+				if i == m.cursor {
+					rowBg = m.theme.SelectedBg
+				} else if rowIdx%2 == 1 {
+					rowBg = m.theme.ZebraBg
+				}

 				if site.Type == "group" {
 					groupRows[i-start] = true
 					icon := typeIcon("group", m.collapsed[site.ID])
-					rows = append(rows, []string{
-						strconv.Itoa(i + 1),
-						m.zones.Mark(fmt.Sprintf("site-%d", i), icon+" "+limitStr(site.Name, nameW-4)),
-						"group",
-						fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID), ErrCatUnknown),
-						subtleStyle.Render("—"),
-						m.groupUptime(site.ID),
-						m.groupSparkline(site.ID, sparkWidth),
-						subtleStyle.Render("-"),
-						subtleStyle.Render("—"),
-					})
+					cells := map[colKey]string{
+						colNum:     strconv.Itoa(i + 1),
+						colName:    m.zones.Mark(fmt.Sprintf("site-%d", i), icon+" "+limitStr(site.Name, nameW-4)),
+						colType:    "group",
+						colStatus:  m.fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID)),
+						colLatency: m.st.subtleStyle.Render("—"),
+						colUptime:  m.groupUptime(site.ID),
+						colHistory: m.groupSparkline(site.ID, sparkWidth, rowBg),
+						colSSL:     m.st.subtleStyle.Render("-"),
+						colRetries: m.st.subtleStyle.Render("—"),
+					}
+					rows = append(rows, pickCols(layout.active, cells))
 					continue
 				}

@@ -158,7 +240,7 @@ func (m Model) viewSitesTab() string {
 					name = limitStr(name, nameW-2)
 				}

-				if (site.Status == "DOWN" || site.Status == "SSL EXP" || site.Status == "LATE" || site.Status == "STALE") && site.LastError != "" {
+				if (site.Status == models.StatusDown || site.Status == models.StatusSSLExp || site.Status == models.StatusLate || site.Status == models.StatusStale) && site.LastError != "" {
 					nameLen := len([]rune(name))
 					errSpace := nameW - nameLen - 3
 					if errSpace > 10 {
@@ -168,36 +250,37 @@ func (m Model) viewSitesTab() string {
 						if tag != "" {
 							errText = tag + " " + errText
 						}
-						name = name + " " + subtleStyle.Render(limitStr(errText, errSpace))
+						name = name + " " + m.st.subtleStyle.Render(limitStr(errText, errSpace))
 					}
 				}

 				hist, _ := m.engine.GetHistory(site.ID)
 				var spark string
 				if site.Type == "push" {
-					spark = heartbeatSparkline(hist.Statuses, sparkWidth)
+					spark = m.heartbeatSparkline(hist.Statuses, sparkWidth, rowBg)
 				} else {
-					spark = latencySparkline(hist.Latencies, hist.Statuses, sparkWidth)
+					spark = m.latencySparkline(hist.Latencies, hist.Statuses, sparkWidth, rowBg)
 				}

-				rows = append(rows, []string{
-					strconv.Itoa(i + 1),
-					m.zones.Mark(fmt.Sprintf("site-%d", i), name),
-					typeIcon(site.Type, false) + " " + site.Type,
-					fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID), classifyError(site.LastError, site.Type, site.StatusCode)),
-					fmtLatency(site.Latency),
-					fmtUptime(hist.Statuses),
-					spark,
-					fmtSSL(site),
-					fmtRetries(site),
-				})
+				cells := map[colKey]string{
+					colNum:     strconv.Itoa(i + 1),
+					colName:    m.zones.Mark(fmt.Sprintf("site-%d", i), name),
+					colType:    typeIcon(site.Type, false) + " " + site.Type,
+					colStatus:  m.fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID)),
+					colLatency: m.fmtLatency(site.Latency),
+					colUptime:  m.fmtUptime(hist.Statuses),
+					colHistory: spark,
+					colSSL:     m.fmtSSL(site),
+					colRetries: m.fmtRetries(site),
+				}
+				rows = append(rows, pickCols(layout.active, cells))
 			}
 			return rows
 		},
 		layout.colWidths,
 		func(row, col int) *lipgloss.Style {
 			if groupRows[row] {
-				s := siteGroupStyle
+				s := m.st.siteGroupStyle
 				return &s
 			}
 			return nil
@@ -243,15 +326,14 @@ func (m *Model) initSiteHuhForm() tea.Cmd {
 		}
 	}

+	// m.alerts is the tab-data cache (≤5s stale) — no store IO in Update.
 	alertOpts := []huh.Option[string]{huh.NewOption("None", "0")}
-	if alerts, err := m.store.GetAllAlerts(); err == nil {
-		for _, a := range alerts {
+	for _, a := range m.alerts {
 		alertOpts = append(alertOpts, huh.NewOption(
 			fmt.Sprintf("%s (%s)", a.Name, a.Type),
 			strconv.Itoa(a.ID),
 		))
 	}
-	}

 	groupOpts := []huh.Option[string]{huh.NewOption("None", "0")}
 	for _, s := range m.sites {
@@ -437,7 +519,7 @@ func (m *Model) initSiteHuhForm() tea.Cmd {
 	return m.huhForm.Init()
 }

-func (m *Model) submitSiteForm() {
+func (m *Model) submitSiteForm() tea.Cmd {
 	d := m.siteFormData
 	interval, _ := strconv.Atoi(d.Interval)
 	alertID, _ := strconv.Atoi(d.AlertID)
@@ -453,7 +535,7 @@ func (m *Model) submitSiteForm() {
 		threshold = 7
 	}

-	site := models.Site{
+	cfg := models.SiteConfig{
 		ID:              m.editID,
 		Name:            d.Name,
 		URL:             d.URL,
@@ -474,15 +556,11 @@ func (m *Model) submitSiteForm() {
 		Regions:         d.Regions,
 	}

-	if m.editID > 0 {
-		if err := m.store.UpdateSite(site); err != nil {
-			m.engine.AddLog("Update site failed: " + err.Error())
-		}
-		m.engine.UpdateSiteConfig(site)
-	} else {
-		if err := m.store.AddSite(site); err != nil {
-			m.engine.AddLog("Add site failed: " + err.Error())
-		}
-	}
+	st := m.store
 	m.state = stateDashboard
+	if m.editID > 0 {
+		m.engine.UpdateSiteConfig(cfg)
+		return writeCmd("Update site", func() error { return st.UpdateSite(context.Background(), cfg) })
+	}
+	return writeCmd("Add site", func() error { return st.AddSite(context.Background(), cfg) })
 }
@@ -1,6 +1,7 @@
 package tui

 import (
+	"context"
 	"fmt"

 	tea "github.com/charmbracelet/bubbletea"
@@ -13,9 +14,9 @@ type userFormData struct {
 	Role      string
 }

-func fmtRole(role string) string {
+func (m Model) fmtRole(role string) string {
 	if role == "admin" {
-		return specialStyle.Render(role)
+		return m.st.specialStyle.Render(role)
 	}
 	return role
 }
@@ -29,7 +30,7 @@ func fmtKey(key string) string {

 func (m Model) viewUsersTab() string {
 	if len(m.users) == 0 {
-		return "\n  No users configured. Press [n] to add one."
+		return m.emptyState("No users configured.", "[n] Add a user")
 	}

 	var headers []string
@@ -53,7 +54,7 @@ func (m Model) viewUsersTab() string {
 				rows = append(rows, []string{
 					fmt.Sprintf("%d", i+1),
 					m.zones.Mark(fmt.Sprintf("user-%d", i), limitStr(u.Username, userW-2)),
-					fmtRole(u.Role),
+					m.fmtRole(u.Role),
 					fmtKey(u.PublicKey),
 				})
 			}
@@ -110,16 +111,18 @@ func (m *Model) initUserHuhForm() tea.Cmd {
 	return m.huhForm.Init()
 }

-func (m *Model) submitUserForm() {
+func (m *Model) submitUserForm() tea.Cmd {
 	d := m.userFormData
-	if m.editID > 0 {
-		if err := m.store.UpdateUser(m.editID, d.Username, d.PublicKey, d.Role); err != nil {
-			m.engine.AddLog("Update user failed: " + err.Error())
-		}
-	} else {
-		if err := m.store.AddUser(d.Username, d.PublicKey, d.Role); err != nil {
-			m.engine.AddLog("Add user failed: " + err.Error())
-		}
-	}
+	st := m.store
+	id := m.editID
+	username, key, role := d.Username, d.PublicKey, d.Role
 	m.state = stateUsers
+	if id > 0 {
+		return writeCmd("Update user", func() error {
+			return st.UpdateUser(context.Background(), id, username, key, role)
+		})
+	}
+	return writeCmd("Add user", func() error {
+		return st.AddUser(context.Background(), username, key, role)
+	})
 }
@@ -5,17 +5,12 @@ import (
 	"github.com/charmbracelet/lipgloss/table"
 )

-var (
-	tableHeaderStyle   lipgloss.Style
-	tableCellStyle     lipgloss.Style
-	tableSelectedStyle lipgloss.Style
-	tableBorderStyle   lipgloss.Style
-	tableZebraStyle    lipgloss.Style
-)
-
 type StyleOverride func(row, col int) *lipgloss.Style

-const wideBreakpoint = 120
+const (
+	wideBreakpoint   = 120
+	mediumBreakpoint = 90
+)

 func (m Model) isWide() bool {
 	return m.termWidth >= wideBreakpoint
@@ -50,13 +45,13 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en

 	t := table.New().
 		Border(lipgloss.RoundedBorder()).
-		BorderStyle(tableBorderStyle).
+		BorderStyle(m.st.tableBorderStyle).
 		Width(tableWidth).
 		Headers(headers...).
 		Rows(rows...).
 		StyleFunc(func(row, col int) lipgloss.Style {
 			if row == table.HeaderRow {
-				h := tableHeaderStyle
+				h := m.st.tableHeaderStyle
 				if col < len(colWidths) && colWidths[col] > 0 {
 					h = h.Width(colWidths[col]).MaxWidth(colWidths[col])
 				}
@@ -66,8 +61,11 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
 			if styleOverride != nil {
 				if s := styleOverride(row, col); s != nil {
 					style := *s
+					if row%2 == 1 {
+						style = style.Background(m.st.tableZebraStyle.GetBackground())
+					}
 					if isSelected {
-						style = tableSelectedStyle.Foreground(s.GetForeground())
+						style = m.st.tableSelectedStyle.Foreground(s.GetForeground())
 					}
 					if col < len(colWidths) && colWidths[col] > 0 {
 						style = style.Width(colWidths[col]).MaxWidth(colWidths[col])
@@ -75,12 +73,12 @@ func (m Model) renderTable(headers []string, items int, buildRows func(start, en
 					return style
 				}
 			}
-			base := tableCellStyle
+			base := m.st.tableCellStyle
 			if row%2 == 1 {
-				base = tableZebraStyle
+				base = m.st.tableZebraStyle
 			}
 			if isSelected {
-				base = tableSelectedStyle
+				base = m.st.tableSelectedStyle
 			}
 			if col < len(colWidths) && colWidths[col] > 0 {
 				base = base.Width(colWidths[col]).MaxWidth(colWidths[col])
@@ -1,6 +1,7 @@
 package tui

 import (
+	"context"
 	"os"
 	"time"

@@ -16,7 +17,10 @@ import (
 	zone "github.com/lrstanley/bubblezone"
 )

-var (
+// styles holds every theme-derived lipgloss style. Each Model owns its own
+// instance (built by newStyles), so concurrent SSH sessions can run different
+// themes without racing on shared package state. Never mutate after creation.
+type styles struct {
 	subtleStyle  lipgloss.Style
 	specialStyle lipgloss.Style
 	warnStyle    lipgloss.Style
@@ -25,26 +29,45 @@ var (
 	titleStyle   lipgloss.Style
 	activeTab    lipgloss.Style
 	inactiveTab  lipgloss.Style
-)

-func applyTheme(t Theme) {
-	subtleStyle = lipgloss.NewStyle().Foreground(t.Subtle)
-	specialStyle = lipgloss.NewStyle().Foreground(t.Success)
-	warnStyle = lipgloss.NewStyle().Foreground(t.Warning)
-	staleStyle = lipgloss.NewStyle().Foreground(t.Stale)
-	dangerStyle = lipgloss.NewStyle().Foreground(t.Danger)
-	titleStyle = lipgloss.NewStyle().Foreground(t.Accent).Bold(true)
-	activeTab = lipgloss.NewStyle().Border(lipgloss.NormalBorder(), false, false, true, false).BorderForeground(t.Accent).Foreground(t.Accent).Bold(true).Padding(0, 1)
-	inactiveTab = lipgloss.NewStyle().Padding(0, 1).Foreground(t.Muted)
+	sparkSuccess string
+	sparkWarning string
+	sparkDanger  string

-	tableHeaderStyle = lipgloss.NewStyle().Foreground(t.Accent).Bold(true).Padding(0, 1)
-	tableCellStyle = lipgloss.NewStyle().Padding(0, 1)
-	tableSelectedStyle = lipgloss.NewStyle().Padding(0, 1).Bold(true).Foreground(t.SelectedFg).Background(t.SelectedBg)
-	tableBorderStyle = lipgloss.NewStyle().Foreground(t.Border)
-	tableZebraStyle = lipgloss.NewStyle().Padding(0, 1).Background(t.ZebraBg)
+	tableHeaderStyle   lipgloss.Style
+	tableCellStyle     lipgloss.Style
+	tableSelectedStyle lipgloss.Style
+	tableBorderStyle   lipgloss.Style
+	tableZebraStyle    lipgloss.Style

-	siteGroupStyle = lipgloss.NewStyle().Padding(0, 1).Bold(true).Foreground(t.Accent)
-	maintStyle = lipgloss.NewStyle().Foreground(t.Purple)
+	siteGroupStyle lipgloss.Style
+	maintStyle     lipgloss.Style
+}
+
+func newStyles(t Theme) *styles {
+	return &styles{
+		subtleStyle:  lipgloss.NewStyle().Foreground(t.Subtle),
+		specialStyle: lipgloss.NewStyle().Foreground(t.Success),
+		warnStyle:    lipgloss.NewStyle().Foreground(t.Warning),
+		staleStyle:   lipgloss.NewStyle().Foreground(t.Stale),
+		dangerStyle:  lipgloss.NewStyle().Foreground(t.Danger),
+		titleStyle:   lipgloss.NewStyle().Foreground(t.Accent).Bold(true),
+		activeTab:    lipgloss.NewStyle().Background(t.Surface).Foreground(t.Accent).Bold(true).Padding(0, 1),
+		inactiveTab:  lipgloss.NewStyle().Padding(0, 1).Foreground(t.Muted),
+
+		sparkSuccess: string(t.Success),
+		sparkWarning: string(t.Warning),
+		sparkDanger:  string(t.Danger),
+
+		tableHeaderStyle:   lipgloss.NewStyle().Foreground(t.Accent).Bold(true).Padding(0, 1),
+		tableCellStyle:     lipgloss.NewStyle().Padding(0, 1),
+		tableSelectedStyle: lipgloss.NewStyle().Padding(0, 1).Bold(true).Foreground(t.SelectedFg).Background(t.SelectedBg),
+		tableBorderStyle:   lipgloss.NewStyle().Foreground(t.Border),
+		tableZebraStyle:    lipgloss.NewStyle().Padding(0, 1).Background(t.ZebraBg),
+
+		siteGroupStyle: lipgloss.NewStyle().Padding(0, 1).Bold(true).Foreground(t.Accent),
+		maintStyle:     lipgloss.NewStyle().Foreground(t.Purple),
+	}
 }

 var pulseFrames = []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"}
@@ -57,6 +80,8 @@ const (
 	chromeFooter = 2 // footer: "\n" prefix + text line
 	chromeTable  = 3 // renderTable "\n" prefix + top border + header + bottom border (lipgloss collapses two into three rendered lines)
 	chromeBase   = chromePadV + chromeHeader + chromeGaps + chromeFooter + chromeTable
+
+	detailSparkWidth = 40
 )

 type sessionState int
@@ -80,6 +105,7 @@ type Model struct {
 	state        sessionState
 	currentTab   int
 	cursor       int
+	selectedID   int
 	tableOffset  int
 	maxTableRows int
 	termWidth    int
@@ -95,10 +121,13 @@ type Model struct {

 	logViewport        viewport.Model
 	logFilterImportant bool
+	logTotal           int
+	logShown           int

 	historyViewport viewport.Model
 	historyChanges  []models.StateChange
 	historySiteName string
+	historySiteID   int

 	slaViewport       viewport.Model
 	slaReport         monitor.SLAReport
@@ -119,6 +148,7 @@ type Model struct {
 	engine     *monitor.Engine
 	theme      Theme
 	themeIndex int
+	st         *styles

 	// harmonica animation state
 	pulseSpring harmonica.Spring
@@ -131,23 +161,32 @@ type Model struct {
 	users              []models.User
 	nodes              []models.ProbeNode
 	maintenanceWindows []models.MaintenanceWindow
+	lastTabLoad        time.Time // last dispatch of loadTabDataCmd (throttle)
+	tabSeq             int       // seq of the newest issued tab-data load
+
+	// detail-panel state-change history, loaded on enter so View does no DB IO
+	detailChanges       []models.StateChange
+	detailChangesSiteID int

 	filterMode bool
 	filterText string

+	sparkTooltipIdx int // clicked sparkline data index, -1 = none
+
 	// demoMode renders a stable status dot instead of the animated pulse so
 	// screenshots/recordings don't capture the spinner mid-frame. Set via UPTOP_DEMO=1.
 	demoMode bool
+	version  string
 }

-func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine) Model {
+func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine, version string) Model {
 	vpLogs := viewport.New(100, 20)
 	vpLogs.SetContent("Waiting for logs...")
 	z := zone.New()
 	spring := harmonica.NewSpring(harmonica.FPS(10), 6.0, 0.4)
 	collapsed := loadCollapsed(s)

-	themeName, _ := s.GetPreference("theme")
+	themeName, _ := s.GetPreference(context.Background(), "theme")
 	theme := themeByName(themeName)
 	themeIdx := 0
 	for i, t := range themes {
@@ -157,8 +196,6 @@ func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine) Model {
 		}
 	}

-	applyTheme(theme)
-
 	return Model{
 		state:           stateDashboard,
 		logViewport:     vpLogs,
@@ -171,10 +208,19 @@ func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine) Model {
 		collapsed:       collapsed,
 		theme:           theme,
 		themeIndex:      themeIdx,
+		st:              newStyles(theme),
 		demoMode:        os.Getenv("UPTOP_DEMO") == "1",
+		version:         version,
+		sparkTooltipIdx: -1,
 	}
 }

-func (m Model) Init() tea.Cmd {
-	return tea.Batch(tea.ClearScreen, tea.Tick(time.Second, func(t time.Time) tea.Msg { return t }))
+// tickCmd schedules the next one-second heartbeat.
+func tickCmd() tea.Cmd {
+	return tea.Tick(time.Second, func(t time.Time) tea.Msg { return tickMsg(t) })
+}
+
+func (m Model) Init() tea.Cmd {
+	// Load tab data immediately so the dashboard isn't empty for the first second.
+	return tea.Batch(tea.ClearScreen, tickCmd(), m.loadTabDataCmd())
 }
@@ -1,6 +1,7 @@
 package tui

 import (
+	"context"
 	"fmt"
 	"time"

@@ -15,8 +16,35 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 	switch msg := msg.(type) {
 	case tea.WindowSizeMsg:
 		return m.handleResize(msg)
-	case time.Time:
-		return m.handleTick(msg)
+	case tickMsg:
+		return m.handleTick(time.Time(msg))
+	case tabDataMsg:
+		return m.handleTabData(msg)
+	case detailDataMsg:
+		// Drop replies for a site the user has already navigated away from,
+		// so a slow load can't clobber the panel currently on screen.
+		if m.state == stateDetail && m.cursor < len(m.sites) && m.sites[m.cursor].ID != msg.siteID {
+			return m, nil
+		}
+		m.detailChanges = msg.changes
+		m.detailChangesSiteID = msg.siteID
+		return m, nil
+	case historyDataMsg:
+		if msg.siteID != m.historySiteID {
+			return m, nil // stale reply for a previously opened history
+		}
+		m.historyChanges = msg.changes
+		m.historyViewport.SetContent(m.buildHistoryContent())
+		m.historyViewport.GotoTop()
+		return m, nil
+	case slaDataMsg:
+		return m.handleSLAData(msg)
+	case writeDoneMsg:
+		if msg.err != nil {
+			m.engine.AddLog(msg.op + " failed: " + msg.err.Error())
+		}
+		m.refreshLive()
+		return m, m.loadTabDataCmd()
 	}

 	if m.state == stateConfirmDelete {
@@ -42,34 +70,34 @@ func (m *Model) handleConfirmDelete(msg tea.Msg) (tea.Model, tea.Cmd) {
 	}
 	switch keyMsg.String() {
 	case "y", "Y":
+		// The store delete runs in a Cmd; the in-memory engine/model updates
+		// stay here so the row vanishes immediately. If the delete fails, the
+		// writeDoneMsg reload converges the UI back to the DB state (and the
+		// engine poll loop re-adds a site that is still in the DB).
+		st := m.store
+		id := m.deleteID
+		var cmd tea.Cmd
 		switch m.deleteTab {
 		case 0:
-			if err := m.store.DeleteSite(m.deleteID); err != nil {
-				m.engine.AddLog("Delete site failed: " + err.Error())
-			}
-			m.engine.RemoveSite(m.deleteID)
+			cmd = writeCmd("Delete site", func() error { return st.DeleteSite(context.Background(), id) })
+			m.engine.RemoveSite(id)
 			m.adjustCursor(len(m.sites) - 1)
 		case 1:
-			if err := m.store.DeleteAlert(m.deleteID); err != nil {
-				m.engine.AddLog("Delete alert failed: " + err.Error())
-			}
+			cmd = writeCmd("Delete alert", func() error { return st.DeleteAlert(context.Background(), id) })
 			m.adjustCursor(len(m.alerts) - 1)
 		case 4:
-			if err := m.store.DeleteMaintenanceWindow(m.deleteID); err != nil {
-				m.engine.AddLog("Delete maintenance window failed: " + err.Error())
-			}
+			cmd = writeCmd("Delete maintenance window", func() error { return st.DeleteMaintenanceWindow(context.Background(), id) })
 			m.adjustCursor(len(m.maintenanceWindows) - 1)
 		case 5:
-			if err := m.store.DeleteUser(m.deleteID); err != nil {
-				m.engine.AddLog("Delete user failed: " + err.Error())
-			}
+			cmd = writeCmd("Delete user", func() error { return st.DeleteUser(context.Background(), id) })
 			m.adjustCursor(len(m.users) - 1)
 		}
-		m.refreshData()
+		m.refreshLive()
 		m.state = stateDashboard
 		if m.deleteTab == 5 {
 			m.state = stateUsers
 		}
+		return m, cmd
 	case "n", "N", "esc":
 		m.state = stateDashboard
 		if m.deleteTab == 5 {
@@ -82,10 +110,6 @@ func (m *Model) handleConfirmDelete(msg tea.Msg) (tea.Model, tea.Cmd) {
 }

 func (m *Model) handleFormMsg(msg tea.Msg) (tea.Model, tea.Cmd) {
-	if wsm, ok := msg.(tea.WindowSizeMsg); ok {
-		m.termWidth = wsm.Width
-		m.termHeight = wsm.Height
-	}
 	if keyMsg, ok := msg.(tea.KeyMsg); ok {
 		if keyMsg.String() == "ctrl+c" {
 			return m, tea.Quit
@@ -105,42 +129,108 @@ func (m *Model) handleFormMsg(msg tea.Msg) (tea.Model, tea.Cmd) {
 			m.huhForm = f
 		}
 		if m.huhForm.State == huh.StateCompleted {
-			m.submitForm()
-			m.refreshData()
+			// The store write runs in the returned Cmd; its writeDoneMsg
+			// triggers the tab-data reload once the row actually exists.
+			cmd := m.submitForm()
+			m.refreshLive()
 			m.huhForm = nil
-			return m, nil
+			return m, cmd
 		}
 		return m, formCmd
 	}
 	return m, nil
 }

-func (m *Model) handleResize(msg tea.WindowSizeMsg) (tea.Model, tea.Cmd) {
-	m.termWidth = msg.Width
-	m.termHeight = msg.Height
+func (m *Model) recalcLayout() {
 	chrome := chromeBase
 	if m.filterMode || m.filterText != "" {
 		chrome++
 	}
-	m.maxTableRows = msg.Height - chrome
+	m.maxTableRows = m.termHeight - chrome
 	if m.maxTableRows < 1 {
 		m.maxTableRows = 1
 	}
+}
+
+func (m *Model) handleResize(msg tea.WindowSizeMsg) (tea.Model, tea.Cmd) {
+	m.termWidth = msg.Width
+	m.termHeight = msg.Height
+	m.recalcLayout()
 	m.logViewport.Width = msg.Width - chromePadH
-	m.logViewport.Height = msg.Height - (chromePadV + chromeHeader + chromeGaps + chromeFooter)
+	m.logViewport.Height = msg.Height - (chromePadV + chromeHeader + chromeFooter + 2)
 	m.historyViewport.Width = msg.Width - chromePadH
 	m.historyViewport.Height = msg.Height - 10
 	m.slaViewport.Width = msg.Width - chromePadH
 	m.slaViewport.Height = msg.Height - 16
-	return m, tea.ClearScreen
+	if m.huhForm != nil {
+		formHeight := msg.Height - 7
+		if formHeight < 5 {
+			formHeight = 5
+		}
+		m.huhForm.WithHeight(formHeight)
+	}
+	return m, nil
 }

 func (m *Model) handleTick(t time.Time) (tea.Model, tea.Cmd) {
-	m.refreshData()
+	m.refreshLive()
 	m.tickCount++
 	target := sinApprox(float64(m.tickCount)*0.3)*0.5 + 0.5
 	m.pulsePos, m.pulseVel = m.pulseSpring.Update(m.pulsePos, m.pulseVel, target)
-	return m, tea.Tick(time.Second, func(t time.Time) tea.Msg { return t })
+
+	cmds := []tea.Cmd{tickCmd()}
+	if t.Sub(m.lastTabLoad) > tabRefreshTTL {
+		m.lastTabLoad = t
+		cmds = append(cmds, m.loadTabDataCmd())
+		if dc := m.detailRefreshCmd(); dc != nil {
+			cmds = append(cmds, dc)
+		}
+	}
+	return m, tea.Batch(cmds...)
+}
+
+// detailRefreshCmd reloads the open detail panel's state-change list on the
+// tab-data cadence, so a flap that happens while the panel is on screen shows
+// up without leaving and re-entering. Nil when no detail panel is open.
+func (m *Model) detailRefreshCmd() tea.Cmd {
+	if m.state != stateDetail || m.cursor >= len(m.sites) {
+		return nil
+	}
+	return m.loadDetailCmd(m.sites[m.cursor].ID)
+}
+
+// handleTabData folds an async tab-data load into the model. Replies older
+// than the newest issued load are dropped so out-of-order completions can't
+// overwrite fresher data. On error the previous data is kept and the failure
+// logged, so a transient store error never blanks the view.
+func (m *Model) handleTabData(msg tabDataMsg) (tea.Model, tea.Cmd) {
+	if msg.seq != m.tabSeq {
+		return m, nil
+	}
+	if msg.err != nil {
+		m.engine.AddLog("Tab data refresh failed: " + msg.err.Error())
+		return m, nil
+	}
+	m.alerts = msg.alerts
+	if m.isAdmin {
+		m.users = msg.users
+	}
+	m.nodes = msg.nodes
+	m.maintenanceWindows = msg.maint
+	m.clampCursor()
+	return m, nil
+}
+
+// testAlertCmd sends a test notification off the UI goroutine; the outcome
+// surfaces through the engine log (picked up by the next refreshLive).
+func (m *Model) testAlertCmd(id int, name string) tea.Cmd {
+	eng := m.engine
+	return func() tea.Msg {
+		if err := eng.TestAlert(id); err != nil {
+			eng.AddLog(fmt.Sprintf("Test alert failed (%s): %v", name, err))
+		}
+		return nil
+	}
 }

 func (m *Model) handleMouse(msg tea.MouseMsg) (tea.Model, tea.Cmd) {
@@ -162,6 +252,12 @@ func (m *Model) handleMouse(msg tea.MouseMsg) (tea.Model, tea.Cmd) {
 		}
 		return m, nil
 	}
+	if m.state == stateDetail {
+		if msg.Action == tea.MouseActionPress && msg.Button == tea.MouseButtonLeft {
+			return m.handleSparklineClick(msg)
+		}
+		return m, nil
+	}
 	if m.state != stateDashboard && m.state != stateLogs && m.state != stateUsers {
 		return m, nil
 	}
@@ -197,6 +293,7 @@ func (m *Model) handleMouse(msg tea.MouseMsg) (tea.Model, tea.Cmd) {
 			}
 		}
 	}
+	m.syncSelectedID()
 	return m, nil
 }

@@ -234,24 +331,26 @@ func (m *Model) handleFilterKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 		m.filterText = ""
 		m.cursor = 0
 		m.tableOffset = 0
-		m.refreshData()
+		m.recalcLayout()
+		m.refreshLive()
 	case "enter":
 		m.filterMode = false
+		m.recalcLayout()
 	case "backspace":
 		if len(m.filterText) > 0 {
 			m.filterText = m.filterText[:len(m.filterText)-1]
 			m.cursor = 0
 			m.tableOffset = 0
-			m.refreshData()
+			m.refreshLive()
 		}
 	case "ctrl+c":
 		return m, tea.Quit
 	default:
-		if len(msg.String()) == 1 {
-			m.filterText += msg.String()
+		if len(msg.Runes) == 1 {
+			m.filterText += string(msg.Runes)
 			m.cursor = 0
 			m.tableOffset = 0
-			m.refreshData()
+			m.refreshLive()
 		}
 	}
 	return m, nil
@@ -259,7 +358,15 @@ func (m *Model) handleFilterKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {

 func (m *Model) handleDetailKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 	switch msg.String() {
-	case "i", "esc":
+	case "esc":
+		if m.sparkTooltipIdx >= 0 {
+			m.sparkTooltipIdx = -1
+			return m, nil
+		}
+		m.sparkTooltipIdx = -1
+		m.state = stateDashboard
+	case "i":
+		m.sparkTooltipIdx = -1
 		m.state = stateDashboard
 	case "e":
 		return m.handleEditItem()
@@ -267,25 +374,48 @@ func (m *Model) handleDetailKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 		if m.cursor < len(m.sites) {
 			site := m.sites[m.cursor]
 			m.historySiteName = site.Name
-			m.historyChanges = m.engine.GetStateChanges(site.ID, 100)
+			m.historySiteID = site.ID
+			m.historyChanges = nil
 			m.historyViewport = viewport.New(
 				m.termWidth-chromePadH,
 				m.termHeight-10,
 			)
-			m.historyViewport.SetContent(m.buildHistoryContent())
-			m.historyViewport.GotoTop()
+			m.historyViewport.SetContent("\n  Loading state history...")
 			m.state = stateHistory
+			return m, m.loadHistoryCmd(site.ID)
 		}
 	case "s":
 		if m.cursor < len(m.sites) {
-			m.openSLAView(m.sites[m.cursor])
+			return m, m.openSLAView(m.sites[m.cursor])
 		}
 	case "q":
-		return m, tea.Quit
+		m.state = stateDashboard
 	}
 	return m, nil
 }

+func (m *Model) handleSparklineClick(msg tea.MouseMsg) (tea.Model, tea.Cmd) {
+	if m.cursor >= len(m.sites) {
+		return m, nil
+	}
+	site := m.sites[m.cursor]
+	hist, _ := m.engine.GetHistory(site.ID)
+
+	if zi := m.zones.Get("spark-latency"); zi != nil && !zi.IsZero() && zi.InBounds(msg) {
+		x, _ := zi.Pos(msg)
+		m.sparkTooltipIdx = resolveSparklineIndex(x, detailSparkWidth, len(hist.Latencies))
+		return m, nil
+	}
+	if zi := m.zones.Get("spark-heartbeat"); zi != nil && !zi.IsZero() && zi.InBounds(msg) {
+		x, _ := zi.Pos(msg)
+		m.sparkTooltipIdx = resolveSparklineIndex(x, detailSparkWidth, len(hist.Statuses))
+		return m, nil
+	}
+
+	m.sparkTooltipIdx = -1
+	return m, nil
+}
+
 func (m *Model) handleSLAKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 	switch msg.String() {
 	case "q", "esc":
@@ -294,7 +424,7 @@ func (m *Model) handleSLAKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 		idx := int(msg.String()[0]-'0') - 1
 		if idx >= 0 && idx < len(slaPeriods) {
 			m.slaPeriodIdx = idx
-			m.recomputeSLA()
+			return m, m.loadSLACmd(m.slaSiteID, idx)
 		}
 	case "up", "k":
 		m.slaViewport.ScrollUp(1)
@@ -310,26 +440,39 @@ func (m *Model) handleSLAKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 	return m, nil
 }

-func (m *Model) openSLAView(site models.Site) {
+func (m *Model) openSLAView(site models.Site) tea.Cmd {
 	m.slaSiteName = site.Name
 	m.slaSiteID = site.ID
 	m.slaPeriodIdx = 2 // default 30d
-	m.recomputeSLA()
+	m.slaViewport = viewport.New(
+		m.termWidth-chromePadH,
+		m.termHeight-16,
+	)
+	m.slaViewport.SetContent("\n  Loading SLA report...")
 	m.state = stateSLA
+	return m.loadSLACmd(site.ID, m.slaPeriodIdx)
 }

-func (m *Model) recomputeSLA() {
-	period := slaPeriods[m.slaPeriodIdx]
-	since := time.Now().Add(-period.duration)
-	changes := m.engine.GetStateChangesSince(m.slaSiteID, since)
+// handleSLAData folds an async SLA load into the model. The SLA math itself is
+// pure CPU and cheap, so it runs here; only the state-change read happens in
+// the Cmd. Replies for a different site or period than currently selected are
+// stale and dropped.
+func (m *Model) handleSLAData(msg slaDataMsg) (tea.Model, tea.Cmd) {
+	if msg.siteID != m.slaSiteID || msg.periodIdx != m.slaPeriodIdx {
+		return m, nil
+	}
+	period := slaPeriods[msg.periodIdx]

-	var currentStatus string
-	if m.cursor < len(m.sites) {
-		currentStatus = m.sites[m.cursor].Status
+	var currentStatus models.Status
+	for _, s := range m.sites {
+		if s.ID == msg.siteID {
+			currentStatus = s.Status
+			break
+		}
 	}

-	m.slaReport = monitor.ComputeSLA(changes, currentStatus, period.duration)
-	m.slaDailyBreakdown = monitor.ComputeDailyBreakdown(changes, currentStatus, period.days)
+	m.slaReport = monitor.ComputeSLA(msg.changes, currentStatus, period.duration)
+	m.slaDailyBreakdown = monitor.ComputeDailyBreakdown(msg.changes, currentStatus, period.days, time.Now())

 	m.slaViewport = viewport.New(
 		m.termWidth-chromePadH,
@@ -337,6 +480,7 @@ func (m *Model) recomputeSLA() {
 	)
 	m.slaViewport.SetContent(m.buildSLADailyContent())
 	m.slaViewport.GotoTop()
+	return m, nil
 }

 func (m *Model) handleHistoryKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
@@ -363,10 +507,8 @@ func (m *Model) handleHistoryKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {

 func (m *Model) handleAlertDetailKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 	switch msg.String() {
-	case "i", "esc":
+	case "q", "i", "esc":
 		m.state = stateDashboard
-	case "q":
-		return m, tea.Quit
 	}
 	return m, nil
 }
@@ -379,11 +521,13 @@ func (m *Model) handleDashboardKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 	case "/":
 		if m.currentTab == 0 {
 			m.filterMode = true
+			m.recalcLayout()
 			return m, nil
 		}
 	case "f":
 		if m.state == stateLogs {
 			m.logFilterImportant = !m.logFilterImportant
+			m.refreshLogContent()
 			return m, nil
 		}
 	case "tab":
@@ -401,6 +545,7 @@ func (m *Model) handleDashboardKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 			if m.cursor < m.tableOffset {
 				m.tableOffset = m.cursor
 			}
+			m.syncSelectedID()
 		}
 	case "down", "j":
 		if m.state == stateLogs {
@@ -412,6 +557,7 @@ func (m *Model) handleDashboardKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 				if m.cursor >= m.tableOffset+m.maxTableRows {
 					m.tableOffset++
 				}
+				m.syncSelectedID()
 			}
 		}
 	case "n":
@@ -421,31 +567,33 @@ func (m *Model) handleDashboardKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 	case "t":
 		if m.currentTab == 1 && len(m.alerts) > 0 {
 			a := m.alerts[m.cursor]
-			go func() {
-				if err := m.engine.TestAlert(a.ID); err != nil {
-					m.engine.AddLog(fmt.Sprintf("Test alert failed (%s): %v", a.Name, err))
-				}
-			}()
-			return m, nil
+			return m, m.testAlertCmd(a.ID, a.Name)
 		}
 	case " ":
 		if m.currentTab == 0 && len(m.sites) > 0 && m.sites[m.cursor].Type == "group" {
 			gid := m.sites[m.cursor].ID
 			m.collapsed[gid] = !m.collapsed[gid]
-			saveCollapsed(m.store, m.collapsed)
-			m.refreshData()
+			payload := collapsedJSON(m.collapsed)
+			st := m.store
+			m.refreshLive()
+			return m, writeCmd("Save collapsed groups", func() error {
+				return st.SetPreference(context.Background(), "collapsed_groups", payload)
+			})
 		}
 	case "p":
 		if m.currentTab == 0 && len(m.sites) > 0 {
-			site := m.sites[m.cursor]
-			m.engine.ToggleSitePause(site.ID)
-			site.Paused = !site.Paused
-			_ = m.store.UpdateSitePaused(site.ID, site.Paused)
-			m.refreshData()
+			id := m.sites[m.cursor].ID
+			paused := m.engine.ToggleSitePause(id)
+			st := m.store
+			m.refreshLive()
+			return m, writeCmd("Update pause state", func() error {
+				return st.UpdateSitePaused(context.Background(), id, paused)
+			})
 		}
 	case "i":
 		if m.currentTab == 0 && len(m.sites) > 0 {
 			m.state = stateDetail
+			return m, m.loadDetailCmd(m.sites[m.cursor].ID)
 		} else if m.currentTab == 1 && len(m.alerts) > 0 {
 			m.state = stateAlertDetail
 		}
@@ -455,18 +603,24 @@ func (m *Model) handleDashboardKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
 			now := time.Now()
 			isActive := !mw.StartTime.After(now) && (mw.EndTime.IsZero() || mw.EndTime.After(now))
 			if isActive {
-				if err := m.store.EndMaintenanceWindow(mw.ID); err != nil {
-					m.engine.AddLog("End maintenance failed: " + err.Error())
-				}
-				m.refreshData()
+				st := m.store
+				id := mw.ID
+				m.refreshLive()
+				return m, writeCmd("End maintenance", func() error {
+					return st.EndMaintenanceWindow(context.Background(), id)
+				})
 			}
 		}
 	case "T":
 		m.themeIndex = (m.themeIndex + 1) % len(themes)
 		m.theme = themes[m.themeIndex]
-		applyTheme(m.theme)
-		_ = m.store.SetPreference("theme", m.theme.Name)
-	case "d", "backspace":
+		m.st = newStyles(m.theme)
+		st := m.store
+		name := m.theme.Name
+		return m, writeCmd("Save theme", func() error {
+			return st.SetPreference(context.Background(), "theme", name)
+		})
+	case "d":
 		return m.handleDeleteItem()
 	}
 	return m, nil
@@ -601,37 +755,30 @@ func (m *Model) switchTab(idx int) {
 	}
 }

-func (m *Model) adjustCursor(newLen int) {
-	if m.cursor >= newLen && m.cursor > 0 {
-		m.cursor--
-	}
-	if m.cursor < m.tableOffset {
-		m.tableOffset = m.cursor
-		if m.tableOffset < 0 {
-			m.tableOffset = 0
-		}
-	}
+func (m *Model) adjustCursor(_ int) {
+	m.clampCursor()
 }

-func (m *Model) submitForm() {
+func (m *Model) submitForm() tea.Cmd {
 	switch m.state {
 	case stateFormSite:
 		if m.siteFormData != nil {
-			m.submitSiteForm()
+			return m.submitSiteForm()
 		}
 	case stateFormAlert:
 		if m.alertFormData != nil {
-			m.submitAlertForm()
+			return m.submitAlertForm()
 		}
 	case stateFormUser:
 		if m.userFormData != nil {
-			m.submitUserForm()
+			return m.submitUserForm()
 		}
 	case stateFormMaint:
 		if m.maintFormData != nil {
-			m.submitMaintForm()
+			return m.submitMaintForm()
 		}
 	}
+	return nil
 }

 func (m Model) currentListLen() int {
@@ -0,0 +1,336 @@
+package tui
+
+import (
+	"context"
+	"strings"
+	"testing"
+	"time"
+
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/store/storetest"
+	tea "github.com/charmbracelet/bubbletea"
+	zone "github.com/lrstanley/bubblezone"
+)
+
+// --- minimal Store mock for TUI data-flow tests ---
+
+type tuiMockStore struct {
+	storetest.BaseMock
+	alerts           []models.AlertConfig
+	users            []models.User
+	nodes            []models.ProbeNode
+	maint            []models.MaintenanceWindow
+	stateChanges     []models.StateChange
+	stateChangeCalls int
+	deleteSiteCalls  int
+}
+
+func (m *tuiMockStore) GetAllAlerts(_ context.Context) ([]models.AlertConfig, error) {
+	return m.alerts, nil
+}
+func (m *tuiMockStore) GetAllUsers(_ context.Context) ([]models.User, error) { return m.users, nil }
+func (m *tuiMockStore) GetAllNodes(_ context.Context) ([]models.ProbeNode, error) {
+	return m.nodes, nil
+}
+func (m *tuiMockStore) GetStateChanges(_ context.Context, _ int, _ int) ([]models.StateChange, error) {
+	m.stateChangeCalls++
+	return m.stateChanges, nil
+}
+func (m *tuiMockStore) GetAllMaintenanceWindows(_ context.Context, _ int) ([]models.MaintenanceWindow, error) {
+	return m.maint, nil
+}
+func (m *tuiMockStore) DeleteSite(_ context.Context, _ int) error {
+	m.deleteSiteCalls++
+	return nil
+}
+
+func newTestModel(ms *tuiMockStore) Model {
+	return Model{
+		store:               ms,
+		engine:              monitor.NewEngine(ms),
+		isAdmin:             true,
+		zones:               zone.New(),
+		detailChangesSiteID: -1,
+		theme:               themeFlexokiDark,
+		st:                  newStyles(themeFlexokiDark),
+	}
+}
+
+// --- Tests ---
+
+func TestLoadTabDataCmd_ReturnsRows(t *testing.T) {
+	ms := &tuiMockStore{
+		alerts: []models.AlertConfig{{ID: 1, Name: "a"}},
+		nodes:  []models.ProbeNode{{ID: "n1"}},
+		users:  []models.User{{Username: "u"}},
+		maint:  []models.MaintenanceWindow{{ID: 7}},
+	}
+	m := newTestModel(ms)
+
+	msg := m.loadTabDataCmd()()
+	td, ok := msg.(tabDataMsg)
+	if !ok {
+		t.Fatalf("expected tabDataMsg, got %T", msg)
+	}
+	if len(td.alerts) != 1 || len(td.nodes) != 1 || len(td.users) != 1 || len(td.maint) != 1 {
+		t.Errorf("unexpected counts: %+v", td)
+	}
+	if td.err != nil {
+		t.Errorf("unexpected err: %v", td.err)
+	}
+}
+
+func TestHandleTabData_PopulatesModel(t *testing.T) {
+	m := newTestModel(&tuiMockStore{})
+	msg := tabDataMsg{
+		alerts: []models.AlertConfig{{ID: 1}},
+		nodes:  []models.ProbeNode{{ID: "n"}},
+		users:  []models.User{{Username: "u"}},
+		maint:  []models.MaintenanceWindow{{ID: 2}},
+	}
+	updated, _ := m.handleTabData(msg)
+	got := updated.(*Model)
+	if len(got.alerts) != 1 || len(got.nodes) != 1 || len(got.users) != 1 || len(got.maintenanceWindows) != 1 {
+		t.Errorf("model not populated: alerts=%d nodes=%d users=%d maint=%d",
+			len(got.alerts), len(got.nodes), len(got.users), len(got.maintenanceWindows))
+	}
+}
+
+func TestHandleTabData_ErrorKeepsPreviousData(t *testing.T) {
+	m := newTestModel(&tuiMockStore{})
+	m.alerts = []models.AlertConfig{{ID: 99}} // pre-existing data
+	updated, _ := m.handleTabData(tabDataMsg{err: errSentinel})
+	got := updated.(*Model)
+	if len(got.alerts) != 1 || got.alerts[0].ID != 99 {
+		t.Error("transient error wiped previous tab data")
+	}
+}
+
+var errSentinel = &stubErr{}
+
+type stubErr struct{}
+
+func (*stubErr) Error() string { return "boom" }
+
+func TestDetailLoad_CachesAndViewDoesNoIO(t *testing.T) {
+	ms := &tuiMockStore{stateChanges: []models.StateChange{{FromStatus: "UP", ToStatus: "DOWN"}}}
+	m := newTestModel(ms)
+	m.sites = []models.Site{{SiteConfig: models.SiteConfig{ID: 1, Name: "site"}, SiteState: models.SiteState{Status: "DOWN"}}}
+	m.cursor = 0
+	m.state = stateDetail
+	m.termWidth = 120
+	m.termHeight = 40
+
+	// Entering detail dispatches the load Cmd.
+	cmd := m.loadDetailCmd(1)
+	if cmd == nil {
+		t.Fatal("loadDetailCmd returned nil")
+	}
+	msg := cmd()
+	dd, ok := msg.(detailDataMsg)
+	if !ok || dd.siteID != 1 || len(dd.changes) != 1 {
+		t.Fatalf("unexpected detailDataMsg: %+v", msg)
+	}
+	if ms.stateChangeCalls != 1 {
+		t.Fatalf("expected exactly 1 store hit from the load Cmd, got %d", ms.stateChangeCalls)
+	}
+
+	// Apply the msg through Update (caches into the model).
+	updated, _ := m.Update(dd)
+	m = updated.(Model)
+	if m.detailChangesSiteID != 1 || len(m.detailChanges) != 1 {
+		t.Fatalf("detail changes not cached: id=%d n=%d", m.detailChangesSiteID, len(m.detailChanges))
+	}
+
+	// Render the detail panel several times — it must read the cache, not the DB.
+	for i := 0; i < 3; i++ {
+		_ = m.viewDetailPanel()
+	}
+	if ms.stateChangeCalls != 1 {
+		t.Errorf("View performed DB IO: store hit %d times (want 1, from the Cmd only)", ms.stateChangeCalls)
+	}
+}
+
+func TestHandleTick_ThrottlesTabLoad(t *testing.T) {
+	m := newTestModel(&tuiMockStore{})
+	mp := &m
+
+	t0 := time.Unix(1_000_000, 0)
+	mp.handleTick(t0)
+	if !mp.lastTabLoad.Equal(t0) {
+		t.Fatalf("first tick should dispatch + stamp lastTabLoad=%v, got %v", t0, mp.lastTabLoad)
+	}
+
+	// Within the TTL → no new dispatch, stamp unchanged.
+	mp.handleTick(t0.Add(time.Second))
+	if !mp.lastTabLoad.Equal(t0) {
+		t.Errorf("tick within TTL should not re-dispatch; lastTabLoad=%v", mp.lastTabLoad)
+	}
+
+	// Past the TTL → dispatch again.
+	t2 := t0.Add(tabRefreshTTL + time.Second)
+	mp.handleTick(t2)
+	if !mp.lastTabLoad.Equal(t2) {
+		t.Errorf("tick past TTL should re-dispatch; lastTabLoad=%v want %v", mp.lastTabLoad, t2)
+	}
+}
+
+// keyMsg builds a plain-rune key message ("h", "s", ...).
+func keyMsg(s string) tea.KeyMsg {
+	return tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune(s)}
+}
+
+func TestHandleTabData_DropsStaleSeq(t *testing.T) {
+	m := newTestModel(&tuiMockStore{})
+	mp := &m
+	_ = mp.loadTabDataCmd() // seq 1 (superseded)
+	_ = mp.loadTabDataCmd() // seq 2 (newest)
+
+	updated, _ := mp.handleTabData(tabDataMsg{seq: 1, alerts: []models.AlertConfig{{ID: 1}}})
+	if got := updated.(*Model); len(got.alerts) != 0 {
+		t.Error("stale tab-data reply was applied over a newer in-flight load")
+	}
+
+	updated, _ = mp.handleTabData(tabDataMsg{seq: 2, alerts: []models.AlertConfig{{ID: 2}}})
+	if got := updated.(*Model); len(got.alerts) != 1 || got.alerts[0].ID != 2 {
+		t.Error("fresh tab-data reply was not applied")
+	}
+}
+
+func TestHistoryKey_LoadsOffUIGoroutine(t *testing.T) {
+	ms := &tuiMockStore{stateChanges: []models.StateChange{{FromStatus: "UP", ToStatus: "DOWN"}}}
+	m := newTestModel(ms)
+	m.sites = []models.Site{{SiteConfig: models.SiteConfig{ID: 7, Name: "site"}}}
+	m.state = stateDetail
+	m.termWidth, m.termHeight = 120, 40
+
+	updated, cmd := (&m).handleDetailKey(keyMsg("h"))
+	if ms.stateChangeCalls != 0 {
+		t.Fatal("history keypress hit the store synchronously in Update")
+	}
+	got := updated.(*Model)
+	if got.state != stateHistory || got.historySiteID != 7 {
+		t.Fatalf("history view not opened: state=%v siteID=%d", got.state, got.historySiteID)
+	}
+	if cmd == nil {
+		t.Fatal("expected a history load Cmd")
+	}
+
+	msg := cmd()
+	hd, ok := msg.(historyDataMsg)
+	if !ok || hd.siteID != 7 || len(hd.changes) != 1 {
+		t.Fatalf("unexpected historyDataMsg: %+v", msg)
+	}
+
+	folded, _ := got.Update(hd)
+	m2 := folded.(Model)
+	if len(m2.historyChanges) != 1 {
+		t.Fatal("history reply not folded into the model")
+	}
+
+	// A reply for a previously opened site must not clobber the current one.
+	m2.historySiteID = 9
+	stale, _ := m2.Update(historyDataMsg{siteID: 7, changes: nil})
+	if m3 := stale.(Model); len(m3.historyChanges) != 1 {
+		t.Error("stale history reply overwrote the current view")
+	}
+}
+
+func TestSLAData_DropsStaleReply(t *testing.T) {
+	m := newTestModel(&tuiMockStore{})
+	m.termWidth, m.termHeight = 120, 40
+	m.sites = []models.Site{{SiteConfig: models.SiteConfig{ID: 3}, SiteState: models.SiteState{Status: "UP"}}}
+
+	if cmd := (&m).openSLAView(m.sites[0]); cmd == nil {
+		t.Fatal("openSLAView should return a load Cmd")
+	}
+
+	// Reply for a different period than currently selected → dropped.
+	// (slaDataMsg routes through a pointer-receiver handler, so Update
+	// returns *Model on this path.)
+	updated, _ := m.Update(slaDataMsg{siteID: 3, periodIdx: 0})
+	if mm := updated.(*Model); mm.slaDailyBreakdown != nil {
+		t.Error("stale SLA reply (old period) was applied")
+	}
+
+	// Matching reply → report computed.
+	updated, _ = updated.(*Model).Update(slaDataMsg{siteID: 3, periodIdx: m.slaPeriodIdx})
+	if mm := updated.(*Model); mm.slaDailyBreakdown == nil {
+		t.Error("matching SLA reply was not applied")
+	}
+}
+
+func TestConfirmDelete_WritesOffUIGoroutine(t *testing.T) {
+	ms := &tuiMockStore{}
+	m := newTestModel(ms)
+	m.sites = []models.Site{{SiteConfig: models.SiteConfig{ID: 4, Name: "s"}}}
+	m.state = stateConfirmDelete
+	m.deleteTab = 0
+	m.deleteID = 4
+
+	updated, cmd := (&m).handleConfirmDelete(keyMsg("y"))
+	if ms.deleteSiteCalls != 0 {
+		t.Fatal("delete hit the store synchronously in Update")
+	}
+	if cmd == nil {
+		t.Fatal("expected a write Cmd")
+	}
+	if got := updated.(*Model); got.state != stateDashboard {
+		t.Fatalf("expected return to dashboard, got state %v", got.state)
+	}
+
+	wd, ok := cmd().(writeDoneMsg)
+	if !ok || wd.err != nil {
+		t.Fatalf("unexpected write result: %+v", wd)
+	}
+	if ms.deleteSiteCalls != 1 {
+		t.Fatalf("expected exactly 1 store delete from the Cmd, got %d", ms.deleteSiteCalls)
+	}
+}
+
+func TestWriteDoneMsg_LogsErrorAndReloads(t *testing.T) {
+	m := newTestModel(&tuiMockStore{})
+
+	updated, cmd := m.Update(writeDoneMsg{op: "Delete site", err: errSentinel})
+	if cmd == nil {
+		t.Error("writeDoneMsg did not trigger a tab-data reload")
+	}
+
+	mm := updated.(Model)
+	found := false
+	for _, line := range mm.engine.GetLogs() {
+		if strings.Contains(line, "Delete site failed: boom") {
+			found = true
+		}
+	}
+	if !found {
+		t.Error("write error was not logged")
+	}
+}
+
+func TestDetailRefreshCmd_OnlyWhileDetailOpen(t *testing.T) {
+	ms := &tuiMockStore{stateChanges: []models.StateChange{{FromStatus: "UP", ToStatus: "DOWN"}}}
+	m := newTestModel(ms)
+	m.sites = []models.Site{{SiteConfig: models.SiteConfig{ID: 5, Name: "site"}}}
+
+	m.state = stateDashboard
+	if (&m).detailRefreshCmd() != nil {
+		t.Error("refresh Cmd issued outside the detail view")
+	}
+
+	m.state = stateDetail
+	cmd := (&m).detailRefreshCmd()
+	if cmd == nil {
+		t.Fatal("open detail panel should refresh on the tab-data cadence")
+	}
+	dd, ok := cmd().(detailDataMsg)
+	if !ok || dd.siteID != 5 || len(dd.changes) != 1 {
+		t.Fatalf("unexpected detail refresh reply: %+v", dd)
+	}
+
+	m.cursor = 7 // cursor out of range → no refresh, no panic
+	if (&m).detailRefreshCmd() != nil {
+		t.Error("refresh Cmd issued for an out-of-range cursor")
+	}
+}
@@ -6,6 +6,7 @@ import (
 	"strings"
 	"time"

+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
 	"github.com/charmbracelet/lipgloss"
 )

@@ -16,7 +17,7 @@ func sinApprox(x float64) float64 {
 func (m Model) pulseIndicator() string {
 	hasDown := false
 	for _, s := range m.sites {
-		if !s.Paused && !m.isMonitorInMaintenance(s.ID) && (s.Status == "DOWN" || s.Status == "SSL EXP") {
+		if !s.Paused && !m.isMonitorInMaintenance(s.ID) && (s.Status == models.StatusDown || s.Status == models.StatusSSLExp) {
 			hasDown = true
 			break
 		}
@@ -54,8 +55,8 @@ func (m Model) View() string {
 		case 5:
 			kind = "user"
 		}
-		msg := dangerStyle.Render(fmt.Sprintf("Delete %s \"%s\"?", kind, m.deleteName))
-		hint := subtleStyle.Render("[y] Confirm  [n] Cancel")
+		msg := m.st.dangerStyle.Render(fmt.Sprintf("Delete %s \"%s\"?", kind, m.deleteName))
+		hint := m.st.subtleStyle.Render("[y] Confirm  [n] Cancel")
 		box := lipgloss.NewStyle().
 			Border(lipgloss.RoundedBorder()).
 			BorderForeground(m.theme.Danger).
@@ -84,18 +85,13 @@ func (m Model) View() string {
 			case stateFormMaint:
 				title = "New Maintenance Window"
 			}
-			formHeight := m.termHeight - 7
-			if formHeight < 5 {
-				formHeight = 5
-			}
-			m.huhForm.WithHeight(formHeight)
-			header := titleStyle.Render(title)
-			footer := subtleStyle.Render("\n[Esc] Cancel")
+			header := m.st.titleStyle.Render(title)
+			footer := m.st.subtleStyle.Render("\n[Esc] Cancel")
 			return lipgloss.NewStyle().Padding(1, 2).Render(header + "\n\n" + m.huhForm.View() + "\n" + footer)
 		}
 		return ""
 	case stateDetail:
-		return m.viewDetailPanel()
+		return m.zones.Scan(m.viewDetailPanel())
 	case stateHistory:
 		return m.viewHistoryPanel()
 	case stateSLA:
@@ -127,9 +123,9 @@ func (m Model) computeStats() dashboardStats {
 			continue
 		}
 		switch site.Status {
-		case "DOWN", "SSL EXP":
+		case models.StatusDown, models.StatusSSLExp:
 			s.downCount++
-		case "LATE":
+		case models.StatusLate:
 			s.lateCount++
 		}
 	}
@@ -171,54 +167,61 @@ func (m Model) viewDashboard() string {
 		}
 	}

+	content = strings.TrimSpace(content)
 	footer := m.renderFooter(stats)

-	s := lipgloss.NewStyle().Padding(1, 2)
-	if m.termHeight > 0 {
-		s = s.MaxHeight(m.termHeight)
+	outerPad := lipgloss.NewStyle().Padding(1, 2)
+	_, frameV := outerPad.GetFrameSize()
+	availHeight := m.termHeight - frameV
+	if availHeight < 5 {
+		availHeight = 5
 	}
-	return s.Render(header + "\n" + content + "\n" + footer)
+
+	contentHeight := availHeight - lipgloss.Height(header) - lipgloss.Height(footer)
+	if contentHeight < 1 {
+		contentHeight = 1
+	}
+	paddedContent := lipgloss.NewStyle().Height(contentHeight).MaxHeight(contentHeight).Render(content)
+
+	return outerPad.Render(lipgloss.JoinVertical(lipgloss.Top, header, paddedContent, footer))
+}
+
+type tabEntry struct {
+	name  string
+	count int
+	warn  int
 }

 func (m Model) renderTabBar(stats dashboardStats) string {
-	var sitesLabel string
-	if stats.downCount > 0 {
-		sitesLabel = fmt.Sprintf("Sites (%d↓)", stats.downCount)
-	} else if stats.lateCount > 0 {
-		sitesLabel = fmt.Sprintf("Sites (%d⚠)", stats.lateCount)
-	} else if stats.totalMonitors > 0 {
-		sitesLabel = fmt.Sprintf("Sites (%d)", stats.totalMonitors)
-	} else {
-		sitesLabel = "Sites"
+	tabs := []tabEntry{
+		{"Sites", stats.totalMonitors, stats.downCount + stats.lateCount},
+		{"Alerts", len(m.alerts), 0},
+		{"Logs", 0, 0},
+		{"Nodes", len(m.nodes), stats.offlineNodes},
+		{"Maint", len(m.maintenanceWindows), stats.activeMaint},
 	}
-
-	var nodesLabel string
-	if stats.offlineNodes > 0 {
-		nodesLabel = fmt.Sprintf("Nodes (%d!)", stats.offlineNodes)
-	} else if len(m.nodes) > 0 {
-		nodesLabel = fmt.Sprintf("Nodes (%d)", len(m.nodes))
-	} else {
-		nodesLabel = "Nodes"
-	}
-
-	var maintLabel string
-	if stats.activeMaint > 0 {
-		maintLabel = fmt.Sprintf("Maint (%d)", stats.activeMaint)
-	} else {
-		maintLabel = "Maint"
-	}
-
-	tabs := []string{sitesLabel, "Alerts", "Logs", nodesLabel, maintLabel}
 	if m.isAdmin {
-		tabs = append(tabs, "Users")
+		tabs = append(tabs, tabEntry{"Users", len(m.users), 0})
 	}
+
+	countStyle := lipgloss.NewStyle().Foreground(m.theme.Muted)
+
 	var renderedTabs []string
 	for i, t := range tabs {
+		label := t.name
+		if t.count > 0 {
+			badge := countStyle.Render(fmt.Sprintf(" %d", t.count))
+			if t.warn > 0 {
+				badge = m.st.dangerStyle.Render(fmt.Sprintf(" %d", t.warn))
+			}
+			label += badge
+		}
+
 		var rendered string
 		if i == m.currentTab {
-			rendered = activeTab.Render(t)
+			rendered = m.st.activeTab.Render(label)
 		} else {
-			rendered = inactiveTab.Render(t)
+			rendered = m.st.inactiveTab.Render(label)
 		}
 		renderedTabs = append(renderedTabs, m.zones.Mark(fmt.Sprintf("tab-%d", i), rendered))
 	}
@@ -228,21 +231,21 @@ func (m Model) renderTabBar(stats dashboardStats) string {
 func (m Model) renderFooter(stats dashboardStats) string {
 	if m.filterMode {
 		cursor := lipgloss.NewStyle().Foreground(m.theme.Accent).Render("│")
-		return "\n" + titleStyle.Render("/") + " " + m.filterText + cursor + "  " + subtleStyle.Render("[Enter]Apply [Esc]Clear")
+		return "\n" + m.st.titleStyle.Render("/") + " " + m.filterText + cursor + "  " + m.st.subtleStyle.Render("[Enter]Apply [Esc]Clear")
 	}

 	upCount := stats.totalMonitors - stats.downCount - stats.lateCount
 	var upStr string
 	if stats.downCount > 0 {
-		upStr = dangerStyle.Render(fmt.Sprintf("%d/%d UP", upCount, stats.totalMonitors))
+		upStr = m.st.dangerStyle.Render(fmt.Sprintf("%d/%d UP", upCount, stats.totalMonitors))
 	} else if stats.lateCount > 0 {
-		upStr = warnStyle.Render(fmt.Sprintf("%d/%d UP", upCount, stats.totalMonitors))
+		upStr = m.st.warnStyle.Render(fmt.Sprintf("%d/%d UP", upCount, stats.totalMonitors))
 	} else {
-		upStr = specialStyle.Render(fmt.Sprintf("%d/%d UP", upCount, stats.totalMonitors))
+		upStr = m.st.specialStyle.Render(fmt.Sprintf("%d/%d UP", upCount, stats.totalMonitors))
 	}
 	statusParts := []string{upStr}
 	if stats.lateCount > 0 {
-		statusParts = append(statusParts, warnStyle.Render(fmt.Sprintf("%d LATE", stats.lateCount)))
+		statusParts = append(statusParts, m.st.warnStyle.Render(fmt.Sprintf("%d LATE", stats.lateCount)))
 	}
 	if len(m.nodes) > 0 {
 		online := 0
@@ -257,16 +260,16 @@ func (m Model) renderFooter(stats dashboardStats) string {
 		}
 		statusParts = append(statusParts, fmt.Sprintf("%d %s", online, probeLabel))
 	}
-	statusLine := strings.Join(statusParts, subtleStyle.Render(" · "))
+	statusLine := strings.Join(statusParts, m.st.subtleStyle.Render(" · "))

 	var keys string
 	switch m.currentTab {
 	case 0:
-		keys = "[/]Filter [n]New [e]Edit [i]Info [d]Del [p]Pause [T]Theme [Tab]Switch [q]Quit"
+		keys = "[/]Filter [n]New [e]Edit [i]Info [d]Del [p]Pause [Space]Collapse [T]Theme [Tab]Switch [q]Quit"
 	case 1:
 		keys = "[n]New [e]Edit [i]Info [d]Del [t]Test [T]Theme [Tab]Switch [q]Quit"
 	case 2:
-		keys = "[f]Filter [T]Theme [Tab]Switch [q]Quit"
+		keys = "[↑/↓]Scroll  [PgUp/PgDn]Page  [f]Filter  [T]Theme  [Tab]Switch  [q]Quit"
 	case 4:
 		keys = "[n]New [x]End [d]Del [T]Theme [Tab]Switch [q]Quit"
 	case 5:
@@ -275,9 +278,10 @@ func (m Model) renderFooter(stats dashboardStats) string {
 		keys = "[T]Theme [Tab]Switch [q]Quit"
 	}

-	footer := "\n" + statusLine + "  " + subtleStyle.Render(keys)
+	ver := m.st.subtleStyle.Render("v" + m.version)
+	footer := statusLine + "  " + m.st.subtleStyle.Render(keys) + "  " + ver
 	if m.filterText != "" && m.currentTab == 0 {
-		footer = "\n" + subtleStyle.Render(fmt.Sprintf("filter: %s", m.filterText)) + "  " + statusLine + "  " + subtleStyle.Render(keys)
+		footer = m.st.subtleStyle.Render(fmt.Sprintf("filter: %s", m.filterText)) + "  " + statusLine + "  " + m.st.subtleStyle.Render(keys) + "  " + ver
 	}
 	return footer
 }
@@ -2,10 +2,13 @@ package tui

 import (
 	"fmt"
+	"sort"
 	"strconv"
 	"strings"
 	"time"

+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
+	"gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor"
 	"github.com/charmbracelet/lipgloss"
 )

@@ -22,41 +25,41 @@ func (m Model) viewDetailPanel() string {
 	if site.ParentID > 0 {
 		for _, s := range m.sites {
 			if s.ID == site.ParentID {
-				breadcrumb = subtleStyle.Render("  Sites > "+s.Name+" > ") + titleStyle.Render(site.Name)
+				breadcrumb = m.st.subtleStyle.Render("  Sites > "+s.Name+" > ") + m.st.titleStyle.Render(site.Name)
 				break
 			}
 		}
 	}
 	if breadcrumb == "" {
-		breadcrumb = subtleStyle.Render("  Sites > ") + titleStyle.Render(site.Name)
+		breadcrumb = m.st.subtleStyle.Render("  Sites > ") + m.st.titleStyle.Render(site.Name)
 	}
-	b.WriteString(breadcrumb + "\n\n")
+	b.WriteString(breadcrumb + "\n")
+	b.WriteString(m.divider() + "\n")

 	row := func(label, value string) {
-		fmt.Fprintf(&b, "  %-16s %s\n", subtleStyle.Render(label), value)
+		fmt.Fprintf(&b, "  %-16s %s\n", m.st.subtleStyle.Render(label), value)
 	}

 	section := func(label string) {
-		b.WriteString("\n" + subtleStyle.Render("  "+label) + "\n")
+		b.WriteString("\n" + m.st.subtleStyle.Render("  "+label) + "\n")
 	}

-	errCat := classifyError(site.LastError, site.Type, site.StatusCode)
-	row("Status", fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID), errCat))
+	row("Status", m.fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID)))

-	if (site.Status == "DOWN" || site.Status == "SSL EXP" || site.Status == "LATE" || site.Status == "STALE") && site.LastError != "" {
+	if (site.Status == models.StatusDown || site.Status == models.StatusSSLExp || site.Status == models.StatusLate || site.Status == models.StatusStale) && site.LastError != "" {
 		errWidth := m.termWidth - chromePadH - 19
 		if errWidth < 30 {
 			errWidth = 30
 		}
 		wrapped := lipgloss.NewStyle().Width(errWidth).Render(site.LastError)
-		row("Error", dangerStyle.Render(wrapped))
+		row("Error", m.st.dangerStyle.Render(wrapped))
 	}

 	if site.Type == "http" && site.StatusCode > 0 {
 		row("HTTP Code", strconv.Itoa(site.StatusCode))
 	}

-	if (site.Status == "DOWN" || site.Status == "SSL EXP") && site.LastError != "" {
+	if (site.Status == models.StatusDown || site.Status == models.StatusSSLExp) && site.LastError != "" {
 		chain := connectionChain(site.LastError, site.Type, site.StatusCode, strings.HasPrefix(site.URL, "https"))
 		if len(chain) > 0 {
 			b.WriteString("\n")
@@ -64,19 +67,19 @@ func (m Model) viewDetailPanel() string {
 				var icon string
 				switch step.Status {
 				case stepPassed:
-					icon = specialStyle.Render("✓")
+					icon = m.st.specialStyle.Render("✓")
 				case stepFailed:
-					icon = dangerStyle.Render("✗")
+					icon = m.st.dangerStyle.Render("✗")
 				case stepSkipped:
-					icon = subtleStyle.Render("·")
+					icon = m.st.subtleStyle.Render("·")
 				}
 				line := fmt.Sprintf("  %s %-16s", icon, step.Name)
 				if step.Detail != "" {
 					switch step.Status {
 					case stepFailed:
-						line += " " + dangerStyle.Render(step.Detail)
+						line += " " + m.st.dangerStyle.Render(step.Detail)
 					case stepSkipped:
-						line += " " + subtleStyle.Render(step.Detail)
+						line += " " + m.st.subtleStyle.Render(step.Detail)
 					}
 				}
 				b.WriteString(line + "\n")
@@ -97,7 +100,7 @@ func (m Model) viewDetailPanel() string {
 	if m.isMonitorInMaintenance(site.ID) {
 		for _, mw := range m.maintenanceWindows {
 			if mw.Type == "maintenance" && (mw.MonitorID == 0 || mw.MonitorID == site.ID || mw.MonitorID == site.ParentID) {
-				row("Maintenance", maintStyle.Render(mw.Title))
+				row("Maintenance", m.st.maintStyle.Render(mw.Title))
 				break
 			}
 		}
@@ -124,10 +127,10 @@ func (m Model) viewDetailPanel() string {
 	if site.Timeout > 0 {
 		row("Timeout", fmt.Sprintf("%ds", site.Timeout))
 	}
-	row("Latency", fmtLatency(site.Latency))
-	row("Uptime", fmtUptime(hist.Statuses))
+	row("Latency", m.fmtLatency(site.Latency))
+	row("Uptime", m.fmtUptime(hist.Statuses))
 	if !site.LastCheck.IsZero() {
-		row("Last Check", site.LastCheck.Format("15:04:05"))
+		row("Last Check", m.fmtTimeAgo(site.LastCheck))
 	}

 	if site.Type == "http" {
@@ -140,16 +143,16 @@ func (m Model) viewDetailPanel() string {
 			codes = "200-299"
 		}
 		row("Codes", codes)
-		row("SSL", fmtSSL(site))
+		row("SSL", m.fmtSSL(site))
 		if site.IgnoreTLS {
-			row("TLS Verify", dangerStyle.Render("disabled"))
+			row("TLS Verify", m.st.dangerStyle.Render("disabled"))
 		}
 	}

 	if site.MaxRetries > 0 || site.Regions != "" || site.Description != "" {
 		section("CONFIG")
 		if site.MaxRetries > 0 {
-			row("Retries", fmtRetries(site))
+			row("Retries", m.fmtRetries(site))
 		}
 		if site.Regions != "" {
 			row("Regions", site.Regions)
@@ -161,49 +164,58 @@ func (m Model) viewDetailPanel() string {

 	probeResults := m.engine.GetProbeResults(site.ID)
 	if len(probeResults) > 0 {
-		b.WriteString("\n" + subtleStyle.Render("  PROBE RESULTS") + "\n")
-		for nodeID, result := range probeResults {
-			status := specialStyle.Render("UP")
+		nodeIDs := make([]string, 0, len(probeResults))
+		for id := range probeResults {
+			nodeIDs = append(nodeIDs, id)
+		}
+		sort.Strings(nodeIDs)
+		b.WriteString("\n" + m.st.subtleStyle.Render("  PROBE RESULTS") + "\n")
+		for _, nodeID := range nodeIDs {
+			result := probeResults[nodeID]
+			status := m.st.specialStyle.Render("UP")
 			if !result.IsUp {
-				status = dangerStyle.Render("DN")
+				status = m.st.dangerStyle.Render("DN")
 			}
 			latency := time.Duration(result.LatencyNs).Milliseconds()
 			ago := time.Since(result.CheckedAt).Truncate(time.Second)
 			line := fmt.Sprintf("  %-14s %s  %dms  %s ago", nodeID, status, latency, ago)
 			if !result.IsUp && result.ErrorReason != "" {
-				line += "  " + dangerStyle.Render(result.ErrorReason)
+				line += "  " + m.st.dangerStyle.Render(result.ErrorReason)
 			}
 			b.WriteString(line + "\n")
 		}
 	}

-	stateChanges := m.engine.GetStateChanges(site.ID, 5)
+	// Loaded on panel-enter (loadDetailCmd) and cached, so View does no DB IO.
+	var stateChanges []models.StateChange
+	if m.detailChangesSiteID == site.ID {
+		stateChanges = m.detailChanges
+	}
 	if len(stateChanges) > 0 {
-		b.WriteString("\n" + subtleStyle.Render("  STATE CHANGES") + "\n")
+		b.WriteString("\n" + m.st.subtleStyle.Render("  STATE CHANGES") + "\n")
 		for i, sc := range stateChanges {
 			ago := fmtDuration(time.Since(sc.ChangedAt))
-			arrow := subtleStyle.Render(sc.FromStatus) + " → "
-			if sc.ToStatus == "UP" {
-				arrow += specialStyle.Render(sc.ToStatus)
+			arrow := m.st.subtleStyle.Render(sc.FromStatus) + " → "
+			if sc.ToStatus == string(models.StatusUp) {
+				arrow += m.st.specialStyle.Render(sc.ToStatus)
 			} else {
-				arrow += dangerStyle.Render(sc.ToStatus)
+				arrow += m.st.dangerStyle.Render(sc.ToStatus)
 			}
-			line := fmt.Sprintf("  %s  %s", arrow, subtleStyle.Render(ago+" ago"))
+			line := fmt.Sprintf("  %s  %s", arrow, m.st.subtleStyle.Render(ago+" ago"))
 			if dur := computeOutageDuration(stateChanges, i); dur > 0 {
-				line += "  " + warnStyle.Render("outage "+fmtDuration(dur))
+				line += "  " + m.st.warnStyle.Render("outage "+fmtDuration(dur))
 			}
-			if sc.ErrorReason != "" && sc.ToStatus != "UP" {
-				line += "  " + dangerStyle.Render(sc.ErrorReason)
+			if sc.ErrorReason != "" && sc.ToStatus != string(models.StatusUp) {
+				line += "  " + m.st.dangerStyle.Render(sc.ErrorReason)
 			}
 			b.WriteString(line + "\n")
 		}
-		b.WriteString("  " + subtleStyle.Render("[h] History") + "\n")
+		b.WriteString("  " + m.st.subtleStyle.Render("[h] History") + "\n")
 	}

-	b.WriteString("\n")
-	const sparkWidth = 40
+	b.WriteString(m.divider() + "\n")
 	if site.Type == "push" {
-		b.WriteString("  " + heartbeatSparkline(hist.Statuses, sparkWidth))
+		b.WriteString("  " + m.zones.Mark("spark-heartbeat", m.heartbeatSparkline(hist.Statuses, detailSparkWidth, "")))
 		if len(hist.Statuses) > 0 {
 			up := 0
 			for _, s := range hist.Statuses {
@@ -212,11 +224,11 @@ func (m Model) viewDetailPanel() string {
 				}
 			}
 			fmt.Fprintf(&b, "\n  %s %d/%d checks up",
-				subtleStyle.Render("Heartbeats"),
+				m.st.subtleStyle.Render("Heartbeats"),
 				up, len(hist.Statuses))
 		}
 	} else {
-		b.WriteString("  " + latencySparkline(hist.Latencies, hist.Statuses, sparkWidth))
+		b.WriteString("  " + m.zones.Mark("spark-latency", m.latencySparkline(hist.Latencies, hist.Statuses, detailSparkWidth, "")))
 		var minL, maxL, total time.Duration
 		count := 0
 		for i, l := range hist.Latencies {
@@ -236,14 +248,59 @@ func (m Model) viewDetailPanel() string {
 		if count > 0 {
 			avg := total / time.Duration(count)
 			fmt.Fprintf(&b, "\n  %s %dms  %s %dms  %s %dms",
-				subtleStyle.Render("Min"), minL.Milliseconds(),
-				subtleStyle.Render("Avg"), avg.Milliseconds(),
-				subtleStyle.Render("Max"), maxL.Milliseconds())
+				m.st.subtleStyle.Render("Min"), minL.Milliseconds(),
+				m.st.subtleStyle.Render("Avg"), avg.Milliseconds(),
+				m.st.subtleStyle.Render("Max"), maxL.Milliseconds())
 		}
 	}

-	b.WriteString("\n\n")
-	b.WriteString(subtleStyle.Render("  [i/Esc] Back  [e] Edit  [h] History  [s] SLA  [q] Quit"))
+	if m.sparkTooltipIdx >= 0 {
+		b.WriteString("\n" + m.renderSparkTooltip(site, hist, detailSparkWidth))
+	}
+
+	b.WriteString("\n")
+	b.WriteString(m.divider() + "\n")
+	b.WriteString(m.st.subtleStyle.Render("  [q/Esc] Back  [e] Edit  [h] History  [s] SLA  [click] Inspect"))

 	return lipgloss.NewStyle().Padding(1, 2).Render(b.String())
 }
+
+func (m Model) renderSparkTooltip(site models.Site, hist monitor.SiteHistory, sparkWidth int) string {
+	idx := m.sparkTooltipIdx
+
+	var dataLen int
+	if site.Type == "push" {
+		dataLen = len(hist.Statuses)
+	} else {
+		dataLen = len(hist.Latencies)
+	}
+	if idx < 0 || idx >= dataLen {
+		return ""
+	}
+
+	var parts []string
+
+	checksAgo := dataLen - 1 - idx
+	approxSecs := checksAgo * site.Interval
+	if approxSecs == 0 {
+		parts = append(parts, "latest")
+	} else {
+		parts = append(parts, "~"+fmtDuration(time.Duration(approxSecs)*time.Second)+" ago")
+	}
+
+	if site.Type != "push" && idx < len(hist.Latencies) {
+		parts = append(parts, m.fmtLatency(hist.Latencies[idx]))
+	}
+
+	if idx < len(hist.Statuses) {
+		if hist.Statuses[idx] {
+			parts = append(parts, m.st.specialStyle.Render("UP"))
+		} else {
+			parts = append(parts, m.st.dangerStyle.Render("DOWN"))
+		}
+	}
+
+	sep := m.st.subtleStyle.Render(" | ")
+	pos := m.st.subtleStyle.Render(fmt.Sprintf("[%d/%d]", idx+1, dataLen))
+	return "  " + strings.Join(parts, sep) + "  " + pos
+}
@@ -17,14 +17,14 @@ type historyStats struct {

 func computeOutageDuration(changes []models.StateChange, idx int) time.Duration {
 	sc := changes[idx]
-	if sc.ToStatus != "UP" {
+	if sc.ToStatus != string(models.StatusUp) {
 		return 0
 	}
 	if idx+1 >= len(changes) {
 		return 0
 	}
 	prev := changes[idx+1]
-	if prev.ToStatus == "UP" {
+	if prev.ToStatus == string(models.StatusUp) {
 		return 0
 	}
 	dur := sc.ChangedAt.Sub(prev.ChangedAt)
@@ -47,7 +47,9 @@ func computeHistoryStats(changes []models.StateChange) historyStats {
 	return s
 }

-func stateChangeSparkline(changes []models.StateChange, width int) string {
+var stateChangeChars = []rune{'▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'}
+
+func (m Model) stateChangeSparkline(changes []models.StateChange, width int) string {
 	if len(changes) < 2 || width < 4 {
 		return ""
 	}
@@ -91,14 +93,14 @@ func stateChangeSparkline(changes []models.StateChange, width int) string {
 		if idx > 7 {
 			idx = 7
 		}
-		ch := string(sparkChars[idx])
+		ch := string(stateChangeChars[idx])
 		switch {
 		case v >= 3:
-			sb.WriteString(dangerStyle.Render(ch))
+			sb.WriteString(m.st.dangerStyle.Render(ch))
 		case v >= 2:
-			sb.WriteString(warnStyle.Render(ch))
+			sb.WriteString(m.st.warnStyle.Render(ch))
 		default:
-			sb.WriteString(subtleStyle.Render(ch))
+			sb.WriteString(m.st.subtleStyle.Render(ch))
 		}
 	}
 	return sb.String()
@@ -118,26 +120,26 @@ func (m Model) buildHistoryContent() string {
 	for i, sc := range m.historyChanges {
 		ts := sc.ChangedAt.Format("2006-01-02 15:04")

-		arrow := subtleStyle.Render(sc.FromStatus) + " → "
+		arrow := m.st.subtleStyle.Render(sc.FromStatus) + " → "
 		switch sc.ToStatus {
-		case "UP":
-			arrow += specialStyle.Render(sc.ToStatus)
-		case "LATE":
-			arrow += warnStyle.Render(sc.ToStatus)
-		case "STALE":
-			arrow += staleStyle.Render(sc.ToStatus)
+		case string(models.StatusUp):
+			arrow += m.st.specialStyle.Render(sc.ToStatus)
+		case string(models.StatusLate):
+			arrow += m.st.warnStyle.Render(sc.ToStatus)
+		case string(models.StatusStale):
+			arrow += m.st.staleStyle.Render(sc.ToStatus)
 		default:
-			arrow += dangerStyle.Render(sc.ToStatus)
+			arrow += m.st.dangerStyle.Render(sc.ToStatus)
 		}

 		durStr := ""
 		if dur := computeOutageDuration(m.historyChanges, i); dur > 0 {
-			durStr = warnStyle.Render("outage " + fmtDuration(dur))
+			durStr = m.st.warnStyle.Render("outage " + fmtDuration(dur))
 		}

 		reason := ""
-		if sc.ErrorReason != "" && sc.ToStatus != "UP" {
-			reason = dangerStyle.Render(limitStr(sc.ErrorReason, reasonWidth))
+		if sc.ErrorReason != "" && sc.ToStatus != string(models.StatusUp) {
+			reason = m.st.dangerStyle.Render(limitStr(sc.ErrorReason, reasonWidth))
 		}

 		fmt.Fprintf(&b, "  %-18s %s  %-12s %s\n", ts, arrow, durStr, reason)
@@ -149,35 +151,32 @@ func (m Model) buildHistoryContent() string {
 func (m Model) viewHistoryPanel() string {
 	var b strings.Builder

-	header := "  " + titleStyle.Render("STATE HISTORY: "+m.historySiteName)
-	header += "  " + subtleStyle.Render("[q] Back")
+	header := "  " + m.st.titleStyle.Render("STATE HISTORY: "+m.historySiteName)
+	header += "  " + m.st.subtleStyle.Render("[q] Back")
 	b.WriteString(header + "\n")

-	divWidth := m.termWidth - chromePadH - 4
-	if divWidth < 40 {
-		divWidth = 40
-	}
-	b.WriteString("  " + subtleStyle.Render(strings.Repeat("─", divWidth)) + "\n")
+	divWidth := m.dividerWidth()
+	b.WriteString(m.divider() + "\n")

-	sparkline := stateChangeSparkline(m.historyChanges, divWidth)
+	sparkline := m.stateChangeSparkline(m.historyChanges, divWidth)
 	if sparkline != "" {
 		b.WriteString("  " + sparkline + "\n")
-		b.WriteString("  " + subtleStyle.Render(strings.Repeat("─", divWidth)) + "\n")
+		b.WriteString(m.divider() + "\n")
 	}

 	fmt.Fprintf(&b, "  %-18s %-17s %-12s %s\n",
-		subtleStyle.Render("TIME"),
-		subtleStyle.Render("TRANSITION"),
-		subtleStyle.Render("DURATION"),
-		subtleStyle.Render("REASON"))
+		m.st.subtleStyle.Render("TIME"),
+		m.st.subtleStyle.Render("TRANSITION"),
+		m.st.subtleStyle.Render("DURATION"),
+		m.st.subtleStyle.Render("REASON"))

 	if len(m.historyChanges) == 0 {
-		b.WriteString("\n  " + subtleStyle.Render("No state changes recorded") + "\n")
+		b.WriteString("\n  " + m.st.subtleStyle.Render("No state changes recorded") + "\n")
 	} else {
 		b.WriteString(m.historyViewport.View())
 	}

-	b.WriteString("\n  " + subtleStyle.Render(strings.Repeat("─", divWidth)) + "\n")
+	b.WriteString("\n" + m.divider() + "\n")

 	stats := computeHistoryStats(m.historyChanges)
 	parts := []string{fmt.Sprintf("%d events", stats.totalEvents)}
@@ -186,8 +185,8 @@ func (m Model) viewHistoryPanel() string {
 		avg := stats.totalDowntime / time.Duration(stats.outageCount)
 		parts = append(parts, "avg outage "+fmtDuration(avg))
 	}
-	b.WriteString("  " + subtleStyle.Render(strings.Join(parts, " │ ")) + "\n")
-	b.WriteString("  " + subtleStyle.Render("[j/k/↑/↓] Scroll  [q/Esc] Back"))
+	b.WriteString("  " + m.st.subtleStyle.Render(strings.Join(parts, " │ ")) + "\n")
+	b.WriteString("  " + m.st.subtleStyle.Render("[j/k/↑/↓] Scroll  [q/Esc] Back"))

 	return lipgloss.NewStyle().Padding(1, 2).Render(b.String())
 }
@@ -134,14 +134,14 @@ func TestComputeHistoryStats_Empty(t *testing.T) {

 func TestStateChangeSparkline(t *testing.T) {
 	t.Run("empty", func(t *testing.T) {
-		if got := stateChangeSparkline(nil, 20); got != "" {
+		if got := styledModel.stateChangeSparkline(nil, 20); got != "" {
 			t.Errorf("expected empty for nil, got %q", got)
 		}
 	})

 	t.Run("single event", func(t *testing.T) {
 		changes := []models.StateChange{{ChangedAt: time.Now()}}
-		if got := stateChangeSparkline(changes, 20); got != "" {
+		if got := styledModel.stateChangeSparkline(changes, 20); got != "" {
 			t.Errorf("expected empty for single event, got %q", got)
 		}
 	})
@@ -152,7 +152,7 @@ func TestStateChangeSparkline(t *testing.T) {
 			{ChangedAt: now},
 			{ChangedAt: now.Add(-1 * time.Hour)},
 		}
-		got := stateChangeSparkline(changes, 20)
+		got := styledModel.stateChangeSparkline(changes, 20)
 		if got == "" {
 			t.Error("expected non-empty sparkline for two events")
 		}
@@ -164,7 +164,7 @@ func TestStateChangeSparkline(t *testing.T) {
 			{ChangedAt: now},
 			{ChangedAt: now.Add(-1 * time.Hour)},
 		}
-		if got := stateChangeSparkline(changes, 3); got != "" {
+		if got := styledModel.stateChangeSparkline(changes, 3); got != "" {
 			t.Errorf("expected empty for width 3, got %q", got)
 		}
 	})
@@ -24,63 +24,57 @@ var slaPeriods = []struct {
 func (m Model) viewSLAPanel() string {
 	var b strings.Builder

-	header := "  " + titleStyle.Render("SLA REPORT: "+m.slaSiteName)
-	header += "  " + subtleStyle.Render("[q] Back")
+	header := "  " + m.st.titleStyle.Render("SLA REPORT: "+m.slaSiteName)
+	header += "  " + m.st.subtleStyle.Render("[q] Back")
 	b.WriteString(header + "\n")
-
-	divWidth := m.termWidth - chromePadH - 4
-	if divWidth < 40 {
-		divWidth = 40
-	}
-	b.WriteString("  " + subtleStyle.Render(strings.Repeat("─", divWidth)) + "\n")
+	b.WriteString(m.divider() + "\n")

 	period := slaPeriods[m.slaPeriodIdx]
-	b.WriteString("  " + subtleStyle.Render("Period: Last "+period.label) + "\n\n")
+	b.WriteString("  " + m.st.subtleStyle.Render("Period: Last "+period.label) + "\n\n")

 	r := m.slaReport

-	// Uptime bar
-	barWidth := divWidth - 30
+	barWidth := m.dividerWidth() - 30
 	if barWidth < 10 {
 		barWidth = 10
 	}
-	bar := uptimeBar(r.UptimePct, barWidth)
-	uptimeColor := specialStyle
+	bar := m.uptimeBar(r.UptimePct, barWidth)
+	uptimeColor := m.st.specialStyle
 	if r.UptimePct < 99.9 {
-		uptimeColor = warnStyle
+		uptimeColor = m.st.warnStyle
 	}
 	if r.UptimePct < 99.0 {
-		uptimeColor = dangerStyle
+		uptimeColor = m.st.dangerStyle
 	}
-	fmt.Fprintf(&b, "  %-14s %s  %s\n", subtleStyle.Render("Uptime"), uptimeColor.Render(fmt.Sprintf("%s%%", fmtPct(r.UptimePct))), bar)
-	fmt.Fprintf(&b, "  %-14s %s\n", subtleStyle.Render("Downtime"), fmtDuration(r.Downtime))
-	fmt.Fprintf(&b, "  %-14s %d\n", subtleStyle.Render("Outages"), r.OutageCount)
+	fmt.Fprintf(&b, "  %-16s %s  %s\n", m.st.subtleStyle.Render("Uptime"), uptimeColor.Render(fmt.Sprintf("%s%%", fmtPct(r.UptimePct))), bar)
+	fmt.Fprintf(&b, "  %-16s %s\n", m.st.subtleStyle.Render("Downtime"), fmtDuration(r.Downtime))
+	fmt.Fprintf(&b, "  %-16s %d\n", m.st.subtleStyle.Render("Outages"), r.OutageCount)

 	if r.OutageCount > 0 {
-		fmt.Fprintf(&b, "  %-14s %s\n", subtleStyle.Render("Longest"), fmtDuration(r.LongestOut))
-		fmt.Fprintf(&b, "  %-14s %s\n", subtleStyle.Render("MTTR"), fmtDuration(r.MTTR))
-		fmt.Fprintf(&b, "  %-14s %s\n", subtleStyle.Render("MTBF"), fmtDuration(r.MTBF))
+		fmt.Fprintf(&b, "  %-16s %s\n", m.st.subtleStyle.Render("Longest"), fmtDuration(r.LongestOut))
+		fmt.Fprintf(&b, "  %-16s %s\n", m.st.subtleStyle.Render("MTTR"), fmtDuration(r.MTTR))
+		fmt.Fprintf(&b, "  %-16s %s\n", m.st.subtleStyle.Render("MTBF"), fmtDuration(r.MTBF))
 	}

-	b.WriteString("\n  " + subtleStyle.Render(strings.Repeat("─", divWidth)) + "\n")
+	b.WriteString("\n" + m.divider() + "\n")

 	if len(m.slaDailyBreakdown) > 0 {
 		b.WriteString(m.slaViewport.View())
 	}

-	b.WriteString("\n  " + subtleStyle.Render(strings.Repeat("─", divWidth)) + "\n")
+	b.WriteString("\n" + m.divider() + "\n")

 	var keys []string
 	for i, p := range slaPeriods {
 		label := fmt.Sprintf("[%s] %s", p.key, p.label)
 		if i == m.slaPeriodIdx {
-			keys = append(keys, titleStyle.Render(label))
+			keys = append(keys, m.st.titleStyle.Render(label))
 		} else {
-			keys = append(keys, subtleStyle.Render(label))
+			keys = append(keys, m.st.subtleStyle.Render(label))
 		}
 	}
 	b.WriteString("  " + strings.Join(keys, "  "))
-	b.WriteString("  " + subtleStyle.Render("[j/k/↑/↓] Scroll"))
+	b.WriteString("  " + m.st.subtleStyle.Render("[j/k/↑/↓] Scroll  [q/Esc] Back"))

 	return lipgloss.NewStyle().Padding(1, 2).Render(b.String())
 }
@@ -88,32 +82,32 @@ func (m Model) viewSLAPanel() string {
 func (m Model) buildSLADailyContent() string {
 	var b strings.Builder

-	barWidth := m.termWidth - chromePadH - 30
+	barWidth := m.dividerWidth() - 30
 	if barWidth < 10 {
 		barWidth = 10
 	}

-	b.WriteString("  " + subtleStyle.Render("DAILY BREAKDOWN") + "\n")
+	b.WriteString("  " + m.st.subtleStyle.Render("DAILY BREAKDOWN") + "\n")
 	for _, day := range m.slaDailyBreakdown {
 		dateStr := day.Date.Format("Jan 02")
-		bar := uptimeBar(day.UptimePct, barWidth)
+		bar := m.uptimeBar(day.UptimePct, barWidth)
 		pctStr := fmtPct(day.UptimePct) + "%"

-		color := specialStyle
+		color := m.st.specialStyle
 		if day.UptimePct < 99.9 {
-			color = warnStyle
+			color = m.st.warnStyle
 		}
 		if day.UptimePct < 99.0 {
-			color = dangerStyle
+			color = m.st.dangerStyle
 		}

-		fmt.Fprintf(&b, "  %-8s %s  %s\n", subtleStyle.Render(dateStr), bar, color.Render(pctStr))
+		fmt.Fprintf(&b, "  %-8s %s  %s\n", m.st.subtleStyle.Render(dateStr), bar, color.Render(pctStr))
 	}

 	return b.String()
 }

-func uptimeBar(pct float64, width int) string {
+func (m Model) uptimeBar(pct float64, width int) string {
 	filled := int(math.Round(pct / 100 * float64(width)))
 	if filled > width {
 		filled = width
@@ -123,9 +117,9 @@ func uptimeBar(pct float64, width int) string {
 	}
 	empty := width - filled

-	bar := specialStyle.Render(strings.Repeat("█", filled))
+	bar := m.st.specialStyle.Render(strings.Repeat("█", filled))
 	if empty > 0 {
-		bar += subtleStyle.Render(strings.Repeat("░", empty))
+		bar += m.st.subtleStyle.Render(strings.Repeat("░", empty))
 	}
 	return bar
 }