From 1e7d1e67c98f9904a3055183917a6ccbdbc7b79f Mon Sep 17 00:00:00 2001 From: e3mrah <81884938+emrahbaysal@users.noreply.github.com> Date: Fri, 1 May 2026 17:52:18 +0400 Subject: [PATCH] test(e2e): omantel handover Playwright scaffold for Phase 8 (closes #429) (#432) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 8 of the omantel handover (#369) needs an automated E2E that proves DoD: omantel.omani.works runs as a fully self-sufficient Sovereign with zero contabo dependency post-handover. Today this is a SCAFFOLD — when Phase 4/6/7 land, dispatching the new workflow against a live omantel is the entire Phase 8. Canonical seam (anti-duplication, per memory/feedback_anti_duplication_seam_first.md): - tests/e2e/playwright/tests/ ← mirror of sovereign-wizard.spec.ts shape (NOT specs/ as the issue body said — actual repo path is tests/) - tests/e2e/playwright/playwright.config.ts (BASE_URL handling, retries, workers=1, reporter=list) — reused as-is - tests/e2e/playwright/tests/_helpers.ts:reachable() — reused for the pre-flight skip-when-unreachable pattern - .github/workflows/playwright-smoke.yaml — workflow shape (checkout v4, setup-node v4, npm install, playwright install --with-deps chromium, upload-artifact on failure) — mirrored, NOT duplicated What ships: - tests/e2e/playwright/tests/omantel-handover.spec.ts (NEW, 6 tests): 1. sovereign Ready + 23/23 blueprints 2. all bp-* HelmReleases Ready=True 3. catalyst-platform self-hosts (healthz + dashboard "23 / 23 ready") 4. vendor-agnostic Object Storage (post-#425 canonical secret name flux-system/object-storage — NOT hetzner-object-storage) 5. dig +trace omantel.omani.works ends at omantel NS, not contabo 6. zero contabo dependency (omantel /api/healthz keeps returning 200) Self-skips when OMANTEL_BASE_URL/OMANTEL_API_BASE/OPERATOR_BEARER unset. - .github/workflows/omantel-e2e-handover.yaml (NEW): workflow_dispatch ONLY (no schedule cron — per CLAUDE.md "every workflow MUST be event-driven, NEVER scheduled"). Inputs let the operator override base URLs at dispatch time. - docs/omantel-handover-wbs.md: new §10 "Phase 8 acceptance criteria (executable DoD)" — 6 bullets 1:1 with the spec test() blocks; §9 status row added for #429 (🟢 scaffold-shipped). Local verification: cd tests/e2e/playwright && npm install && \ npx playwright test --list tests/omantel-handover.spec.ts → 6 tests listed cleanly npx playwright test tests/omantel-handover.spec.ts → 6 skipped (env vars unset, expected) Out of scope (per #425 / #428 territory split): - internal/hetzner/, infra/hetzner/, platform/velero/chart/, clusters/.../34-velero.yaml — #425's vendor-agnostic sweep - .github/workflows/check-vendor-coupling.yaml — #428's coupling guard Co-authored-by: hatiyildiz --- .github/workflows/omantel-e2e-handover.yaml | 83 +++++++ docs/omantel-handover-wbs.md | 15 ++ .../playwright/tests/omantel-handover.spec.ts | 225 ++++++++++++++++++ 3 files changed, 323 insertions(+) create mode 100644 .github/workflows/omantel-e2e-handover.yaml create mode 100644 tests/e2e/playwright/tests/omantel-handover.spec.ts diff --git a/.github/workflows/omantel-e2e-handover.yaml b/.github/workflows/omantel-e2e-handover.yaml new file mode 100644 index 00000000..951048e2 --- /dev/null +++ b/.github/workflows/omantel-e2e-handover.yaml @@ -0,0 +1,83 @@ +name: omantel handover E2E (Phase 8 DoD) + +# Issue #429 — on-demand E2E that runs the Phase 8 Definition-of-Done suite +# against a live omantel.omani.works Sovereign. Per the master WBS +# (`docs/omantel-handover-wbs.md` §5 Phase 8) this is the final gate proving +# omantel is fully self-sufficient and zero-contabo-dependent. +# +# Trigger model — workflow_dispatch ONLY: +# - This is a SIDE-EFFECT-FREE smoke against a live customer-side cluster; +# we do not want it firing on every push to main. The operator dispatches +# it manually (or another workflow dispatches it via `gh workflow run`) +# once Phase 4/6/7 land and the first omantel run completes. +# - Per CLAUDE.md "Coupled rule — EVERY workflow MUST be event-driven, NEVER +# scheduled": no `schedule:` cron trigger. workflow_dispatch is the +# ad-hoc handle for re-runs against the live target. +# +# What the spec needs (per tests/e2e/playwright/tests/omantel-handover.spec.ts): +# OMANTEL_BASE_URL — console host +# OMANTEL_API_BASE — catalyst-api host +# OPERATOR_BEARER — bootstrap operator JWT (passed via repo secret) +# +# When all three are set the spec runs; when any is unset, the spec self-skips +# (so `npx playwright test --list` works locally without omantel access). + +on: + workflow_dispatch: + inputs: + omantel_base_url: + description: 'Sovereign console URL' + required: false + default: 'https://omantel.omani.works' + omantel_api_base: + description: 'Sovereign catalyst-api URL' + required: false + default: 'https://api.omantel.omani.works' + omantel_sovereign_id: + description: 'Sovereign id (matches /api/sovereigns/)' + required: false + default: 'omantel' + fault_inject_probes: + description: 'Number of /api/healthz probes for the zero-contabo-dependency test' + required: false + default: '5' + +jobs: + e2e: + name: omantel Phase 8 DoD + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: '22' + + - name: Install Playwright dependencies + working-directory: tests/e2e/playwright + run: | + npm install + npx playwright install --with-deps chromium + + - name: Run omantel handover Phase 8 DoD + working-directory: tests/e2e/playwright + env: + OMANTEL_BASE_URL: ${{ inputs.omantel_base_url }} + OMANTEL_API_BASE: ${{ inputs.omantel_api_base }} + OMANTEL_SOVEREIGN_ID: ${{ inputs.omantel_sovereign_id }} + # OPERATOR_BEARER is a repo secret — populated by the operator on + # the omantel side (short-lived JWT). The spec self-skips if unset. + OPERATOR_BEARER: ${{ secrets.OPERATOR_BEARER }} + FAULT_INJECT_PROBES: ${{ inputs.fault_inject_probes }} + run: npx playwright test tests/omantel-handover.spec.ts --reporter=list + + - name: Upload Playwright report + if: failure() + uses: actions/upload-artifact@v4 + with: + name: omantel-handover-playwright-report + path: tests/e2e/playwright/playwright-report/ + retention-days: 30 diff --git a/docs/omantel-handover-wbs.md b/docs/omantel-handover-wbs.md index 76bd1063..6540dc95 100644 --- a/docs/omantel-handover-wbs.md +++ b/docs/omantel-handover-wbs.md @@ -380,3 +380,18 @@ If founder wants to amend ADR-0001 with §13 formalised (S3 vs SeaweedFS rule), | #385 | (parked) | | | | #387 | 🟢 chart-released — per-Sovereign Gateway + Certificate in 01-cilium.yaml; HTTPRoute templates for keycloak/gitea/openbao/grafana/harbor/powerdns/catalyst-platform. Initial blueprint-release failed on default-values render (`fail` in templates); follow-up #402 (`a1bd5502`) switched to `if host { emit }` pattern; blueprint-release re-ran SUCCESS on `a1bd5502`. Sovereign-impact deferred to Phase 8. | #401 + #402 | bp-* charts published; contabo legacy 200 verified | | #370 | 🟢 unblocked by #392; bp-flux RBAC fix in place; runbook scope superseded by `wipe.go` end-to-end working (proven via #399 e2e). Open as backlog if a "purge orphans not tied to a deployment" endpoint is later needed. | (PR #391 closed) | | +| #429 | 🟢 scaffold-shipped — Phase 8 DoD spec authored at `tests/e2e/playwright/tests/omantel-handover.spec.ts` (mirrors canonical `sovereign-wizard.spec.ts` shape; reuses `_helpers.ts:reachable()`); 6 `test()` blocks 1:1 with §10 acceptance bullets (sovereign Ready+23/23, bp-* HRs Ready, catalyst-platform self-host, vendor-agnostic Object Storage Secret per #425, dig +trace ends at omantel NS, zero contabo dependency). Self-skips when `OMANTEL_BASE_URL`/`OMANTEL_API_BASE`/`OPERATOR_BEARER` unset. Workflow `.github/workflows/omantel-e2e-handover.yaml` is `workflow_dispatch:` only (no cron, per CLAUDE.md). Executes against live omantel only after Phase 4/6/7 land. | (this PR) | spec + workflow scaffold; live execution gated on Phase 4/6/7 | + +## 10. Phase 8 acceptance criteria (executable DoD) + +The Phase 8 acceptance bullets below are 1:1 with `tests/e2e/playwright/tests/omantel-handover.spec.ts` (#429 scaffold). When Phase 4/6/7 land and the first omantel.omani.works run completes, the operator dispatches `.github/workflows/omantel-e2e-handover.yaml` against omantel — every bullet here is then a discrete `test()` that must turn GREEN. + +1. **Sovereign Ready + 23/23 blueprints** — `GET /api/sovereigns/` → 200, `state=Ready`, `bootstrapKitReady=true`, all 23 minimal-Sovereign blueprints (per §2) report Ready=true. +2. **All bootstrap-kit HelmReleases Ready=True** — `flux-system` namespace HR list filtered to `bp-*` shows ≥23 entries, every one Ready=True (no Failed, no progressing past install timeout). +3. **Catalyst-platform self-hosts on omantel** — omantel's `/api/healthz` → 200 AND console renders dashboard text "23 / 23 ready" (regex tolerant; copy may shift). +4. **Vendor-agnostic Object Storage wired** — `flux-system/object-storage` Secret exists (NOT the deprecated `flux-system/hetzner-object-storage` — post-#425 canonical name), carries the 5 keys (`s3-endpoint`/`s3-region`/`s3-bucket`/`s3-access-key`/`s3-secret-key`), `s3-endpoint` value is non-empty + URL-shaped (Hetzner today: `https://fsn1.your-objectstorage.com`; AWS would be `s3..amazonaws.com`). +5. **NS delegation reaches omantel PowerDNS** — `dig +trace omantel.omani.works NS` ends at an `*.omantel.omani.works.` authority (or `ns?.omantel.omani.works.`); MUST NOT terminate at `*.openova.io.` (contabo) or `catalyst.openova.io.`. +6. **Zero contabo dependency** — over a 5-minute window with NO calls to contabo's catalyst-api, omantel's `/api/healthz` keeps returning 200 (every probe). Live Phase 8 run extends `FAULT_INJECT_PROBES=300` (5 min × 1Hz); scaffold uses 5 probes for fast feedback. + +The spec self-skips when `OMANTEL_BASE_URL`/`OMANTEL_API_BASE`/`OPERATOR_BEARER` env vars are unset, so it never breaks routine local Playwright runs on contabo. Live execution is on-demand via `workflow_dispatch` — no `schedule:` cron, per CLAUDE.md "every workflow MUST be event-driven". + diff --git a/tests/e2e/playwright/tests/omantel-handover.spec.ts b/tests/e2e/playwright/tests/omantel-handover.spec.ts new file mode 100644 index 00000000..d10db418 --- /dev/null +++ b/tests/e2e/playwright/tests/omantel-handover.spec.ts @@ -0,0 +1,225 @@ +// #429 — Phase 8 omantel handover DoD scaffold. +// +// What this spec does (and ONLY what it does): +// +// This is the executable Definition of Done for Phase 8 of the omantel handover +// (#369). Today it is a SCAFFOLD — when run on contabo or any developer +// machine without the three required env vars below, every test self-skips so +// it never breaks the routine `npx playwright test` run. When Phase 4/6/7 land +// and the first omantel.omani.works Sovereign comes up, the operator runs +// `.github/workflows/omantel-e2e-handover.yaml` against it and these tests +// flip GREEN. +// +// Required env vars (all three must be set, else the suite skips): +// +// OMANTEL_BASE_URL — e.g. https://omantel.omani.works (console) +// OMANTEL_API_BASE — e.g. https://api.omantel.omani.works (catalyst-api) +// OPERATOR_BEARER — bootstrap operator JWT for admin API calls +// +// Optional env vars: +// +// OMANTEL_SOVEREIGN_ID — sovereign id to read back (default `omantel`) +// CONTABO_API_BASE — used by the "zero contabo dependency" test to assert +// omantel responds with no fan-out to contabo. Default +// https://api.openova.io — we DO NOT call it in the +// self-sufficiency assertion; we just record what we +// would have called. +// +// Per `tests/e2e/playwright/tests/_helpers.ts` (`reachable()`), preflight uses +// a single fetch; if the omantel API is unreachable we mark skipped rather +// than fail. Same discipline as #142 sovereign wizard smoke. +// +// Per `docs/INVIOLABLE-PRINCIPLES.md` rule 4 ("never hardcode"), all targets +// come from env vars. Per the same doc's rule 1 ("never speculate"), assertions +// are written against the canonical post-#425 secret name +// `flux-system/object-storage` — NOT the deprecated hetzner-coupled name. +// +// Per CLAUDE.md "Phase 8 — End-to-end omantel run + DoD verification" (WBS §5): +// the six tests below correspond 1:1 to WBS §10 acceptance bullets, in order. + +import { test, expect, request } from '@playwright/test' +import { reachable } from './_helpers' +import { execSync } from 'node:child_process' + +const OMANTEL_BASE_URL = process.env.OMANTEL_BASE_URL || '' +const OMANTEL_API_BASE = process.env.OMANTEL_API_BASE || '' +const OPERATOR_BEARER = process.env.OPERATOR_BEARER || '' +const SOVEREIGN_ID = process.env.OMANTEL_SOVEREIGN_ID || 'omantel' + +// Skip the entire suite at collection time if any required env var is unset. +// This is the "scaffold today, executable when omantel is up" contract from +// the issue body (#429 §"Pre-flight"). +const HAS_ENV = + OMANTEL_BASE_URL.length > 0 && + OMANTEL_API_BASE.length > 0 && + OPERATOR_BEARER.length > 0 + +test.describe('#429 omantel handover — Phase 8 DoD scaffold', () => { + test.skip( + !HAS_ENV, + 'OMANTEL_BASE_URL / OMANTEL_API_BASE / OPERATOR_BEARER not set — this is the Phase 8 spec scaffold; it executes only against a live omantel Sovereign via .github/workflows/omantel-e2e-handover.yaml', + ) + + test.beforeAll(async () => { + if (!HAS_ENV) return + const ok = await reachable(`${OMANTEL_API_BASE}/api/healthz`) + test.skip( + !ok, + `omantel catalyst-api not reachable at ${OMANTEL_API_BASE}/api/healthz — Phase 4/6/7 may not yet have landed, or the cluster is mid-handover`, + ) + }) + + // ------------------------------------------------------------------------- + // 1. sovereign is provisioned and Ready + // WBS §10 bullet 1: GET /api/sovereigns/ → 200 + state=Ready + + // bootstrapKitReady=true + 23/23 blueprint slots Ready. + // ------------------------------------------------------------------------- + test('sovereign is provisioned and Ready (23/23 blueprints)', async () => { + const ctx = await request.newContext({ + baseURL: OMANTEL_API_BASE, + extraHTTPHeaders: { Authorization: `Bearer ${OPERATOR_BEARER}` }, + }) + const res = await ctx.get(`/api/sovereigns/${SOVEREIGN_ID}`) + expect(res.status(), 'sovereign GET should return 200').toBe(200) + const body = await res.json() + expect(body.state, 'sovereign.state').toBe('Ready') + expect(body.bootstrapKitReady, 'sovereign.bootstrapKitReady').toBe(true) + + // Per WBS §2 the minimal Sovereign is exactly 23 blueprints. We assert + // ALL slots are Ready=true — not just count, since a partially-failed + // install can still report 23 entries. + const slots: Array<{ name: string; ready: boolean }> = body.blueprints || [] + expect(slots.length, 'blueprint slot count (WBS §2 minimal Sovereign)').toBe(23) + const notReady = slots.filter((s) => !s.ready).map((s) => s.name) + expect(notReady, 'blueprints not yet Ready').toEqual([]) + }) + + // ------------------------------------------------------------------------- + // 2. all bootstrap-kit HelmReleases are Ready=True + // WBS §10 bullet 2: kubectl-style assertion via the API proxy or `kubectl` + // if available in CI; we go through the API proxy so the test does NOT + // require omantel kubeconfig in CI. + // ------------------------------------------------------------------------- + test('all bootstrap-kit HelmReleases Ready=True in flux-system', async () => { + const ctx = await request.newContext({ + baseURL: OMANTEL_API_BASE, + extraHTTPHeaders: { Authorization: `Bearer ${OPERATOR_BEARER}` }, + }) + const res = await ctx.get('/api/clusters/local/helmreleases?namespace=flux-system') + expect(res.status(), 'helmreleases proxy GET').toBe(200) + const body = await res.json() + const items: Array<{ name: string; ready: boolean; reason?: string }> = body.items || [] + const bp = items.filter((h) => h.name.startsWith('bp-')) + expect(bp.length, 'expected ≥23 bp-* HelmReleases in flux-system').toBeGreaterThanOrEqual(23) + const notReady = bp.filter((h) => !h.ready).map((h) => `${h.name} (${h.reason || 'unknown'})`) + expect(notReady, 'bp-* HelmReleases not yet Ready').toEqual([]) + }) + + // ------------------------------------------------------------------------- + // 3. catalyst-platform self-hosts on omantel + // WBS §10 bullet 3: GET /api/healthz → 200; console renders 23/23 ready. + // ------------------------------------------------------------------------- + test('catalyst-platform self-hosts (healthz + console renders 23/23)', async ({ page }) => { + const ctx = await request.newContext({ baseURL: OMANTEL_API_BASE }) + const health = await ctx.get('/api/healthz') + expect(health.status(), 'omantel catalyst-api /api/healthz').toBe(200) + + // Console dashboard renders the bootstrap-kit progress chip "23 / 23". + // Per the wizard's StepReview / dashboard summary card; copy may shift, + // so we match a regex with whitespace tolerance. + await page.goto(`${OMANTEL_BASE_URL}/sovereign/${SOVEREIGN_ID}/dashboard`) + await expect( + page.getByText(/23\s*\/\s*23\s+ready/i), + 'dashboard should advertise 23 / 23 ready', + ).toBeVisible({ timeout: 15_000 }) + }) + + // ------------------------------------------------------------------------- + // 4. vendor-agnostic Object Storage wired correctly (post-#425) + // WBS §10 bullet 4: assert `flux-system/object-storage` Secret exists, + // s3-endpoint value is URL-shaped + non-empty. + // + // CRITICAL: this assertion uses the post-#425 canonical secret name + // `flux-system/object-storage` (vendor-neutral) — NOT the deprecated + // `flux-system/hetzner-object-storage` (vendor-coupled). #425 ships the + // rename in the same release window as this scaffold. + // ------------------------------------------------------------------------- + test('vendor-agnostic Object Storage Secret wired (post-#425)', async () => { + const ctx = await request.newContext({ + baseURL: OMANTEL_API_BASE, + extraHTTPHeaders: { Authorization: `Bearer ${OPERATOR_BEARER}` }, + }) + // Catalyst-api proxies kubectl get secret. We don't surface the secret + // VALUES (per CLAUDE.md credential hygiene); only key presence + URL shape. + const res = await ctx.get('/api/clusters/local/secrets/flux-system/object-storage/keys') + expect(res.status(), 'flux-system/object-storage Secret should exist').toBe(200) + const body = await res.json() + const keys: string[] = body.keys || [] + for (const required of [ + 's3-endpoint', + 's3-region', + 's3-bucket', + 's3-access-key', + 's3-secret-key', + ]) { + expect(keys, `Secret must carry key ${required}`).toContain(required) + } + + // Endpoint URL-shape probe (no value disclosure — endpoint-shape only). + const endpointShape = await ctx.get('/api/clusters/local/secrets/flux-system/object-storage/endpoint-shape') + expect(endpointShape.status()).toBe(200) + const shape = await endpointShape.json() + expect(shape.urlShaped, 's3-endpoint must be URL-shaped').toBe(true) + expect(shape.empty, 's3-endpoint must be non-empty').toBe(false) + }) + + // ------------------------------------------------------------------------- + // 5. NS delegation reaches omantel PowerDNS + // WBS §10 bullet 5: dig +trace ends at omantel's PowerDNS, NOT contabo. + // We call `dig` via `execSync` because the assertion is about the actual + // DNS chain, not what the API thinks the chain is. + // ------------------------------------------------------------------------- + test('NS delegation reaches omantel PowerDNS (dig +trace)', async () => { + let trace: string + try { + trace = execSync(`dig +trace +time=5 +tries=2 omantel.omani.works NS`, { + encoding: 'utf8', + timeout: 30_000, + }) + } catch (err) { + test.skip(true, `dig not available on this runner: ${(err as Error).message}`) + return + } + // Must see omantel-side authority in the trace tail. We accept any of: + // ns1.omantel.omani.works. + // ns.omantel.omani.works. + // any host whose FQDN ends with `.omantel.omani.works.` and is an NS + expect(trace, 'dig +trace should reach an omantel-side NS').toMatch( + /\bns\d?\.omantel\.omani\.works\.|\bomantel\.omani\.works\.\s+\d+\s+IN\s+NS\s+\S+\.omantel\.omani\.works\./i, + ) + // And must NOT terminate at contabo's PowerDNS / catalyst.openova.io. + expect(trace, 'dig +trace must NOT terminate at contabo nameservers').not.toMatch( + /\bns\d?\.openova\.io\.|\bcatalyst\.openova\.io\./i, + ) + }) + + // ------------------------------------------------------------------------- + // 6. zero contabo dependency + // WBS §10 bullet 6: with contabo simulated as down (we simply DO NOT + // call it, and assert omantel does not depend on it transitively), + // omantel's catalyst-api keeps responding 200 throughout a 5-minute + // window. We compress to 5 probes × 1s in the scaffold; the live Phase 8 + // run can extend with FAULT_INJECT_DURATION_MIN=5. + // ------------------------------------------------------------------------- + test('zero contabo dependency (omantel responds standalone)', async () => { + const ctx = await request.newContext({ baseURL: OMANTEL_API_BASE }) + const probes = parseInt(process.env.FAULT_INJECT_PROBES || '5', 10) + for (let i = 0; i < probes; i++) { + const r = await ctx.get('/api/healthz') + expect(r.status(), `probe ${i + 1}/${probes} — omantel /api/healthz`).toBe(200) + // 1-second sleep between probes; in live Phase 8 this extends to 60s + // × 5 (5-min window). Scaffold uses 1s for fast feedback. + await new Promise((res) => setTimeout(res, 1_000)) + } + }) +})