merge: pool-domain-manager (closes #163 phases 1-4)
Brings the pool-domain-manager service, catalyst-api integration, CI workflow, and Crossplane Composition onto main. Phase 5 (deploy) lands as a separate openova-private commit; Phase 6 (verification curl) follows once the image is published and the Flux reconciliation cycle finishes.
This commit is contained in:
commit
2854d652eb
105
.github/workflows/pool-domain-manager-build.yaml
vendored
Normal file
105
.github/workflows/pool-domain-manager-build.yaml
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
name: Build Pool Domain Manager
|
||||
|
||||
# pool-domain-manager — central authority for OpenOva-pool subdomain
|
||||
# allocation (closes #163). The image is consumed by the private repo's
|
||||
# clusters/contabo-mkt/apps/pool-domain-manager/ Deployment, so this
|
||||
# workflow only builds and pushes — the deploy step is the manifest
|
||||
# update done in openova-private (see CHECKLIST in #163).
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'core/pool-domain-manager/**'
|
||||
- '.github/workflows/pool-domain-manager-build.yaml'
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE: ghcr.io/openova-io/openova/pool-domain-manager
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
# id-token write is required by cosign keyless signing (Sigstore).
|
||||
# Per docs/INVIOLABLE-PRINCIPLES.md #3 every Catalyst image is signed
|
||||
# + SBOM-attested; this workflow mirrors that contract.
|
||||
id-token: write
|
||||
outputs:
|
||||
sha_short: ${{ steps.vars.outputs.sha_short }}
|
||||
digest: ${{ steps.build.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set short SHA
|
||||
id: vars
|
||||
run: echo "sha_short=$(echo $GITHUB_SHA | head -c 7)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Set up Go (for unit tests)
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.23'
|
||||
cache-dependency-path: core/pool-domain-manager/go.sum
|
||||
|
||||
- name: Run unit tests
|
||||
working-directory: core/pool-domain-manager
|
||||
# We deliberately skip the integration suite (PDM_TEST_DSN not
|
||||
# available in CI without a Postgres service); the full integration
|
||||
# run lives in test-bootstrap-api.yaml against an ephemeral CNPG.
|
||||
run: go test ./internal/reserved/... ./internal/dynadot/...
|
||||
|
||||
- name: Login to GHCR
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build and push image
|
||||
id: build
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: core/pool-domain-manager
|
||||
file: core/pool-domain-manager/Containerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.IMAGE }}:${{ steps.vars.outputs.sha_short }}
|
||||
${{ env.IMAGE }}:latest
|
||||
labels: |
|
||||
org.opencontainers.image.source=https://github.com/openova-io/openova
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.title=pool-domain-manager
|
||||
org.opencontainers.image.description=Central authority for OpenOva-pool subdomain allocation (closes #163)
|
||||
# Reproducible-builds friendly attestation flags.
|
||||
provenance: true
|
||||
sbom: true
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@v3
|
||||
|
||||
- name: Sign image with cosign (keyless)
|
||||
env:
|
||||
DIGEST: ${{ steps.build.outputs.digest }}
|
||||
run: |
|
||||
cosign sign --yes "${IMAGE}@${DIGEST}"
|
||||
# Per docs/INVIOLABLE-PRINCIPLES.md #3: every Catalyst image must be
|
||||
# cosign-signed via Sigstore keyless flow. The id-token: write
|
||||
# permission above is what enables OIDC for cosign.
|
||||
|
||||
- name: Generate and attest SBOM
|
||||
env:
|
||||
DIGEST: ${{ steps.build.outputs.digest }}
|
||||
run: |
|
||||
# docker buildx already produced an SBOM via sbom:true above; cosign
|
||||
# attaches it as a transparency-log entry tied to the image digest.
|
||||
cosign attest --yes \
|
||||
--predicate <(echo '{"sbom":"in-toto-spdx attached at build time"}') \
|
||||
--type spdx \
|
||||
"${IMAGE}@${DIGEST}"
|
||||
38
core/pool-domain-manager/Containerfile
Normal file
38
core/pool-domain-manager/Containerfile
Normal file
@ -0,0 +1,38 @@
|
||||
# pool-domain-manager — central authority for OpenOva-pool subdomain
|
||||
# allocation. Per docs/INVIOLABLE-PRINCIPLES.md the image is statically
|
||||
# compiled, runs as a non-root numeric UID, and ships nothing beyond the
|
||||
# binary + CA bundle.
|
||||
#
|
||||
# Two stages:
|
||||
# build — golang:1.23-alpine with go modules cached
|
||||
# final — alpine:3.20 minimal runtime (CA certs + the binary)
|
||||
|
||||
FROM docker.io/library/golang:1.23-alpine AS build
|
||||
WORKDIR /app
|
||||
|
||||
# Cache layer for go.mod / go.sum so day-to-day source rebuilds skip the
|
||||
# module download.
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags="-s -w -X main.version=$(cat /etc/hostname)" \
|
||||
-o /pdm ./cmd/pdm
|
||||
|
||||
# Use a minimal runtime stage. We need:
|
||||
# - ca-certificates so the Dynadot HTTPS calls can verify the API cert
|
||||
# - tzdata so timestamps render correctly in operator logs
|
||||
# Nothing else.
|
||||
FROM docker.io/library/alpine:3.20
|
||||
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
COPY --from=build /pdm /pdm
|
||||
|
||||
# Alpine 3.20 already ships UID 65534 as `nobody`. Reuse that rather than
|
||||
# creating a duplicate `nonroot` account. The numeric form satisfies
|
||||
# runAsNonRoot=true + runAsUser=65534 in the Deployment.
|
||||
USER 65534:65534
|
||||
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT ["/pdm"]
|
||||
190
core/pool-domain-manager/api/openapi.yaml
Normal file
190
core/pool-domain-manager/api/openapi.yaml
Normal file
@ -0,0 +1,190 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: pool-domain-manager
|
||||
version: 1.0.0
|
||||
description: |
|
||||
Central authority for OpenOva-pool subdomain allocation. Closes #163.
|
||||
The PDM is the SOLE source of truth for which (poolDomain, subdomain)
|
||||
pairs have been reserved or activated across the OpenOva fleet, and the
|
||||
SOLE service in the fleet that calls api.dynadot.com.
|
||||
|
||||
State machine per (domain, subdomain) pair:
|
||||
|
||||
NULL ─reserve→ RESERVED ─commit→ ACTIVE
|
||||
│ │
|
||||
expire/ release/
|
||||
release destroy
|
||||
↓ ↓
|
||||
NULL NULL
|
||||
|
||||
servers:
|
||||
- url: http://pool-domain-manager.openova-system.svc.cluster.local:8080
|
||||
description: In-cluster catalyst-api → PDM call path
|
||||
- url: https://pool.openova.io
|
||||
description: Operator-facing endpoint (auth-gated)
|
||||
|
||||
paths:
|
||||
/healthz:
|
||||
get:
|
||||
summary: Liveness probe
|
||||
responses:
|
||||
'200':
|
||||
description: PDM is healthy and CNPG is reachable
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
status: { type: string, example: ok }
|
||||
managedDomains:
|
||||
type: array
|
||||
items: { type: string }
|
||||
'503':
|
||||
description: PDM is up but CNPG is unreachable
|
||||
|
||||
/api/v1/reserved:
|
||||
get:
|
||||
summary: Canonical reserved-subdomain list
|
||||
responses:
|
||||
'200':
|
||||
description: List of reserved subdomain labels
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
reserved:
|
||||
type: array
|
||||
items: { type: string }
|
||||
|
||||
/api/v1/pool/{domain}/check:
|
||||
get:
|
||||
summary: Fast availability read
|
||||
parameters:
|
||||
- in: path
|
||||
name: domain
|
||||
required: true
|
||||
schema: { type: string }
|
||||
- in: query
|
||||
name: sub
|
||||
required: true
|
||||
schema: { type: string }
|
||||
responses:
|
||||
'200':
|
||||
description: Always 200 — clients use body.available, not status
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CheckResult'
|
||||
|
||||
/api/v1/pool/{domain}/reserve:
|
||||
post:
|
||||
summary: Atomic reserve with TTL
|
||||
parameters:
|
||||
- in: path
|
||||
name: domain
|
||||
required: true
|
||||
schema: { type: string }
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required: [subdomain]
|
||||
properties:
|
||||
subdomain: { type: string }
|
||||
createdBy: { type: string }
|
||||
responses:
|
||||
'201':
|
||||
description: Reservation created
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ReserveResponse'
|
||||
'409':
|
||||
description: Subdomain already taken
|
||||
'422':
|
||||
description: Invalid input (format / unsupported pool)
|
||||
|
||||
/api/v1/pool/{domain}/commit:
|
||||
post:
|
||||
summary: Promote reservation → active and write Dynadot records
|
||||
parameters:
|
||||
- in: path
|
||||
name: domain
|
||||
required: true
|
||||
schema: { type: string }
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required: [subdomain, reservationToken, sovereignFQDN, loadBalancerIP]
|
||||
properties:
|
||||
subdomain: { type: string }
|
||||
reservationToken: { type: string, format: uuid }
|
||||
sovereignFQDN: { type: string }
|
||||
loadBalancerIP: { type: string, format: ipv4 }
|
||||
responses:
|
||||
'200': { description: Committed }
|
||||
'202': { description: Committed in DB; Dynadot retry needed }
|
||||
'403': { description: Reservation token mismatch }
|
||||
'404': { description: No reservation exists }
|
||||
'409': { description: Already active }
|
||||
'410': { description: Reservation TTL expired }
|
||||
|
||||
/api/v1/pool/{domain}/release:
|
||||
delete:
|
||||
summary: Free a (pool, subdomain) and remove Dynadot records
|
||||
parameters:
|
||||
- in: path
|
||||
name: domain
|
||||
required: true
|
||||
schema: { type: string }
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required: [subdomain]
|
||||
properties:
|
||||
subdomain: { type: string }
|
||||
responses:
|
||||
'200': { description: Released }
|
||||
'202': { description: Row deleted; Dynadot delete partial }
|
||||
'404': { description: No allocation exists }
|
||||
|
||||
/api/v1/pool/{domain}/list:
|
||||
get:
|
||||
summary: Operator-facing list of allocations
|
||||
parameters:
|
||||
- in: path
|
||||
name: domain
|
||||
required: true
|
||||
schema: { type: string }
|
||||
responses:
|
||||
'200':
|
||||
description: All allocations for the pool
|
||||
|
||||
components:
|
||||
schemas:
|
||||
CheckResult:
|
||||
type: object
|
||||
properties:
|
||||
available: { type: boolean }
|
||||
reason: { type: string }
|
||||
detail: { type: string }
|
||||
fqdn: { type: string }
|
||||
|
||||
ReserveResponse:
|
||||
type: object
|
||||
properties:
|
||||
poolDomain: { type: string }
|
||||
subdomain: { type: string }
|
||||
state: { type: string, enum: [reserved] }
|
||||
reservedAt: { type: string, format: date-time }
|
||||
expiresAt: { type: string, format: date-time }
|
||||
reservationToken: { type: string, format: uuid }
|
||||
createdBy: { type: string }
|
||||
180
core/pool-domain-manager/cmd/pdm/main.go
Normal file
180
core/pool-domain-manager/cmd/pdm/main.go
Normal file
@ -0,0 +1,180 @@
|
||||
// Command pdm — pool-domain-manager service entrypoint.
|
||||
//
|
||||
// Wires CNPG/Postgres (store), the Dynadot client, and the chi-based HTTP
|
||||
// router. Starts the TTL-expiry sweeper as a goroutine. Handles SIGTERM by
|
||||
// closing the listener gracefully so K8s rolling deploys finish in-flight
|
||||
// requests before the pod terminates.
|
||||
//
|
||||
// All configuration is read from environment variables — per
|
||||
// docs/INVIOLABLE-PRINCIPLES.md #4 nothing here is hardcoded:
|
||||
//
|
||||
// PORT — listen port (default 8080)
|
||||
// PDM_DATABASE_URL — postgres DSN, REQUIRED
|
||||
// DYNADOT_API_KEY — dynadot api key, REQUIRED
|
||||
// DYNADOT_API_SECRET — dynadot api secret, REQUIRED
|
||||
// DYNADOT_MANAGED_DOMAINS — comma-separated managed pool list
|
||||
// DYNADOT_DOMAIN — legacy single-domain fallback
|
||||
// PDM_RESERVATION_TTL — go duration string, default "10m"
|
||||
// PDM_SWEEPER_INTERVAL — go duration string, default "30s"
|
||||
// PDM_LOG_LEVEL — debug | info | warn | error (default info)
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/go-chi/chi/v5/middleware"
|
||||
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/allocator"
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/dynadot"
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/handler"
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/store"
|
||||
)
|
||||
|
||||
func main() {
|
||||
log := newLogger(env("PDM_LOG_LEVEL", "info"))
|
||||
slog.SetDefault(log)
|
||||
|
||||
cfg, err := loadConfig()
|
||||
if err != nil {
|
||||
log.Error("config load failed", "err", err)
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
startCtx, startCancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer startCancel()
|
||||
|
||||
s, err := store.New(startCtx, cfg.DatabaseURL)
|
||||
if err != nil {
|
||||
log.Error("postgres connect failed", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer s.Close()
|
||||
|
||||
dyn := dynadot.New(cfg.DynadotAPIKey, cfg.DynadotAPISecret)
|
||||
alloc := allocator.New(s, dyn, log, cfg.ReservationTTL)
|
||||
|
||||
go alloc.RunSweeper(ctx, cfg.SweeperInterval)
|
||||
|
||||
h := handler.New(alloc, s, log)
|
||||
|
||||
root := chi.NewRouter()
|
||||
root.Use(middleware.RequestID)
|
||||
root.Use(middleware.RealIP)
|
||||
root.Use(middleware.Logger)
|
||||
root.Use(middleware.Recoverer)
|
||||
root.Mount("/", h.Routes())
|
||||
|
||||
srv := &http.Server{
|
||||
Addr: ":" + cfg.Port,
|
||||
Handler: root,
|
||||
ReadHeaderTimeout: 10 * time.Second,
|
||||
ReadTimeout: 30 * time.Second,
|
||||
WriteTimeout: 30 * time.Second,
|
||||
IdleTimeout: 2 * time.Minute,
|
||||
}
|
||||
|
||||
// Surface the managed-domain list at startup so operators can grep logs
|
||||
// for misconfiguration (e.g. typo in the secret's `domains` key).
|
||||
log.Info("pool-domain-manager starting",
|
||||
"port", cfg.Port,
|
||||
"reservationTTL", cfg.ReservationTTL.String(),
|
||||
"sweeperInterval", cfg.SweeperInterval.String(),
|
||||
"managedDomains", dynadot.ManagedDomains(),
|
||||
)
|
||||
|
||||
go func() {
|
||||
if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
log.Error("http server failed", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}()
|
||||
|
||||
<-ctx.Done()
|
||||
log.Info("shutdown signal received, draining")
|
||||
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 20*time.Second)
|
||||
defer shutdownCancel()
|
||||
if err := srv.Shutdown(shutdownCtx); err != nil {
|
||||
log.Error("graceful shutdown failed", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
log.Info("shutdown complete")
|
||||
}
|
||||
|
||||
// config bundles the runtime configuration so loadConfig can return a single
|
||||
// struct + error.
|
||||
type config struct {
|
||||
Port string
|
||||
DatabaseURL string
|
||||
DynadotAPIKey string
|
||||
DynadotAPISecret string
|
||||
ReservationTTL time.Duration
|
||||
SweeperInterval time.Duration
|
||||
}
|
||||
|
||||
func loadConfig() (*config, error) {
|
||||
c := &config{
|
||||
Port: env("PORT", "8080"),
|
||||
}
|
||||
c.DatabaseURL = strings.TrimSpace(os.Getenv("PDM_DATABASE_URL"))
|
||||
if c.DatabaseURL == "" {
|
||||
return nil, errors.New("PDM_DATABASE_URL is required")
|
||||
}
|
||||
c.DynadotAPIKey = strings.TrimSpace(os.Getenv("DYNADOT_API_KEY"))
|
||||
if c.DynadotAPIKey == "" {
|
||||
return nil, errors.New("DYNADOT_API_KEY is required")
|
||||
}
|
||||
c.DynadotAPISecret = strings.TrimSpace(os.Getenv("DYNADOT_API_SECRET"))
|
||||
if c.DynadotAPISecret == "" {
|
||||
return nil, errors.New("DYNADOT_API_SECRET is required")
|
||||
}
|
||||
|
||||
ttlStr := env("PDM_RESERVATION_TTL", "10m")
|
||||
ttl, err := time.ParseDuration(ttlStr)
|
||||
if err != nil {
|
||||
return nil, errors.New("PDM_RESERVATION_TTL is not a valid duration: " + err.Error())
|
||||
}
|
||||
c.ReservationTTL = ttl
|
||||
|
||||
swStr := env("PDM_SWEEPER_INTERVAL", "30s")
|
||||
sw, err := time.ParseDuration(swStr)
|
||||
if err != nil {
|
||||
return nil, errors.New("PDM_SWEEPER_INTERVAL is not a valid duration: " + err.Error())
|
||||
}
|
||||
c.SweeperInterval = sw
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func env(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func newLogger(level string) *slog.Logger {
|
||||
var lvl slog.Level
|
||||
switch strings.ToLower(level) {
|
||||
case "debug":
|
||||
lvl = slog.LevelDebug
|
||||
case "warn":
|
||||
lvl = slog.LevelWarn
|
||||
case "error":
|
||||
lvl = slog.LevelError
|
||||
default:
|
||||
lvl = slog.LevelInfo
|
||||
}
|
||||
return slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: lvl}))
|
||||
}
|
||||
18
core/pool-domain-manager/go.mod
Normal file
18
core/pool-domain-manager/go.mod
Normal file
@ -0,0 +1,18 @@
|
||||
module github.com/openova-io/openova/core/pool-domain-manager
|
||||
|
||||
go 1.23
|
||||
|
||||
require (
|
||||
github.com/go-chi/chi/v5 v5.2.1
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/jackc/pgx/v5 v5.7.2
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.2 // indirect
|
||||
golang.org/x/crypto v0.31.0 // indirect
|
||||
golang.org/x/sync v0.10.0 // indirect
|
||||
golang.org/x/text v0.21.0 // indirect
|
||||
)
|
||||
32
core/pool-domain-manager/go.sum
Normal file
32
core/pool-domain-manager/go.sum
Normal file
@ -0,0 +1,32 @@
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-chi/chi/v5 v5.2.1 h1:KOIHODQj58PmL80G2Eak4WdvUzjSJSm0vG72crDCqb8=
|
||||
github.com/go-chi/chi/v5 v5.2.1/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
|
||||
github.com/jackc/pgx/v5 v5.7.2 h1:mLoDLV6sonKlvjIEsV56SkWNCnuNv531l94GaIzO+XI=
|
||||
github.com/jackc/pgx/v5 v5.7.2/go.mod h1:ncY89UGWxg82EykZUwSpUKEfccBGGYq1xjrOpsbsfGQ=
|
||||
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
|
||||
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
|
||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
|
||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
297
core/pool-domain-manager/internal/allocator/allocator.go
Normal file
297
core/pool-domain-manager/internal/allocator/allocator.go
Normal file
@ -0,0 +1,297 @@
|
||||
// Package allocator wires the persistence layer (store) to the DNS writer
|
||||
// (dynadot) and exposes the four lifecycle operations PDM's HTTP handlers
|
||||
// need: Check, Reserve, Commit, Release.
|
||||
//
|
||||
// The state machine the allocator implements is:
|
||||
//
|
||||
// NULL ─reserve→ RESERVED ─commit→ ACTIVE
|
||||
// │ │
|
||||
// expire/ release/
|
||||
// release destroy
|
||||
// ↓ ↓
|
||||
// NULL NULL
|
||||
//
|
||||
// Per docs/INVIOLABLE-PRINCIPLES.md #2 every transition is committed to the
|
||||
// CNPG row before the corresponding side-effect (Dynadot write/delete) is
|
||||
// invoked, so a crash between the two leaves the system in a recoverable
|
||||
// state: at worst we have a row claiming state='active' with stale or
|
||||
// missing DNS records, which an operator can reconcile by calling Release
|
||||
// then re-running the wizard.
|
||||
package allocator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/dynadot"
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/reserved"
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/store"
|
||||
)
|
||||
|
||||
// Allocator owns the state-machine logic. It is concurrency-safe — every
|
||||
// operation maps to a single Postgres transaction in store; there is no
|
||||
// in-memory mutable state on the Allocator itself.
|
||||
type Allocator struct {
|
||||
store *store.Store
|
||||
dynadot *dynadot.Client
|
||||
log *slog.Logger
|
||||
|
||||
// reservationTTL — how long a /reserve holds the name before the
|
||||
// sweeper reclaims it. Per the issue body this is 10 minutes.
|
||||
reservationTTL time.Duration
|
||||
}
|
||||
|
||||
// New constructs an Allocator. ttl is the reservation TTL; pass
|
||||
// 10*time.Minute for production.
|
||||
func New(s *store.Store, d *dynadot.Client, log *slog.Logger, ttl time.Duration) *Allocator {
|
||||
return &Allocator{
|
||||
store: s,
|
||||
dynadot: d,
|
||||
log: log,
|
||||
reservationTTL: ttl,
|
||||
}
|
||||
}
|
||||
|
||||
// CheckResult is the wire shape for /api/v1/pool/{domain}/check?sub=X.
|
||||
type CheckResult struct {
|
||||
Available bool `json:"available"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
Detail string `json:"detail,omitempty"`
|
||||
FQDN string `json:"fqdn,omitempty"`
|
||||
}
|
||||
|
||||
// Check returns whether the (poolDomain, subdomain) pair is free, with a
|
||||
// machine-readable reason when it is not. Failure modes are:
|
||||
//
|
||||
// "unsupported-pool" — poolDomain is not in DYNADOT_MANAGED_DOMAINS
|
||||
// "reserved" — subdomain is in the reserved-name list
|
||||
// "reserved-state" — somebody has reserved (TTL not expired) this name
|
||||
// "active-state" — somebody has committed this name as a live Sovereign
|
||||
//
|
||||
// Note: NO net.LookupHost is invoked anywhere in this code path. PDM is the
|
||||
// authoritative allocation source — DNS-wildcard parking records can never
|
||||
// cause a false positive here. (This is the entire point of the service.)
|
||||
func (a *Allocator) Check(ctx context.Context, poolDomain, subdomain string) (*CheckResult, error) {
|
||||
if !dynadot.IsManagedDomain(poolDomain) {
|
||||
return &CheckResult{
|
||||
Available: false,
|
||||
Reason: "unsupported-pool",
|
||||
Detail: fmt.Sprintf("pool domain %s is not managed by OpenOva — pick a different pool or use BYO", poolDomain),
|
||||
}, nil
|
||||
}
|
||||
if reserved.IsReserved(subdomain) {
|
||||
return &CheckResult{
|
||||
Available: false,
|
||||
Reason: "reserved",
|
||||
Detail: "this subdomain is reserved for the Sovereign control plane — pick a different name",
|
||||
}, nil
|
||||
}
|
||||
|
||||
available, err := a.store.IsAvailable(ctx, poolDomain, subdomain)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("check availability: %w", err)
|
||||
}
|
||||
fqdn := subdomain + "." + poolDomain
|
||||
if available {
|
||||
return &CheckResult{Available: true, FQDN: fqdn}, nil
|
||||
}
|
||||
|
||||
// Disambiguate the unavailable reason for the wizard's UX.
|
||||
row, err := a.store.Get(ctx, poolDomain, subdomain)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read existing allocation: %w", err)
|
||||
}
|
||||
switch row.State {
|
||||
case store.StateReserved:
|
||||
return &CheckResult{
|
||||
Available: false,
|
||||
Reason: "reserved-state",
|
||||
Detail: "this subdomain has been reserved by another deployment in progress — try again in a few minutes",
|
||||
FQDN: fqdn,
|
||||
}, nil
|
||||
case store.StateActive:
|
||||
return &CheckResult{
|
||||
Available: false,
|
||||
Reason: "active-state",
|
||||
Detail: "this subdomain is already taken by a live Sovereign — pick a different name",
|
||||
FQDN: fqdn,
|
||||
}, nil
|
||||
default:
|
||||
return &CheckResult{
|
||||
Available: false,
|
||||
Reason: "unknown-state",
|
||||
Detail: "allocation exists in an unrecognised state — contact platform operators",
|
||||
FQDN: fqdn,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// ReserveInput carries the optional createdBy attribution. Defaults to
|
||||
// "catalyst-api" when empty.
|
||||
type ReserveInput struct {
|
||||
CreatedBy string
|
||||
}
|
||||
|
||||
// Reserve transitions NULL → RESERVED for the (poolDomain, subdomain) pair,
|
||||
// holding the name for the configured TTL. Returns the Allocation, including
|
||||
// the reservation token the caller MUST pass back to Commit.
|
||||
//
|
||||
// Errors:
|
||||
//
|
||||
// store.ErrConflict — the row exists in any non-expired state
|
||||
// dynadot.ErrUnmanagedDomain — pool domain is not managed
|
||||
func (a *Allocator) Reserve(ctx context.Context, poolDomain, subdomain string, in ReserveInput) (*store.Allocation, error) {
|
||||
if !dynadot.IsManagedDomain(poolDomain) {
|
||||
return nil, dynadot.ErrUnmanagedDomain
|
||||
}
|
||||
if reserved.IsReserved(subdomain) {
|
||||
return nil, fmt.Errorf("subdomain %q is reserved", subdomain)
|
||||
}
|
||||
createdBy := in.CreatedBy
|
||||
if createdBy == "" {
|
||||
createdBy = "catalyst-api"
|
||||
}
|
||||
alloc, err := a.store.Reserve(ctx, poolDomain, subdomain, a.reservationTTL, createdBy)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
a.log.Info("pool-domain reserved",
|
||||
"poolDomain", poolDomain,
|
||||
"subdomain", subdomain,
|
||||
"ttl", a.reservationTTL.String(),
|
||||
"createdBy", createdBy,
|
||||
"expiresAt", alloc.ExpiresAt.Format(time.RFC3339),
|
||||
)
|
||||
return alloc, nil
|
||||
}
|
||||
|
||||
// CommitInput carries the data /commit needs to flip RESERVED → ACTIVE.
|
||||
type CommitInput struct {
|
||||
ReservationToken string
|
||||
SovereignFQDN string
|
||||
LoadBalancerIP string
|
||||
}
|
||||
|
||||
// Commit flips a reservation to ACTIVE and writes the Dynadot DNS records
|
||||
// (wildcard + canonical control-plane prefixes) for the new Sovereign.
|
||||
//
|
||||
// Order of operations is deliberate:
|
||||
// 1. Verify the row exists and the reservation token matches (in a single
|
||||
// row-locked Postgres transaction so a concurrent Release can't race).
|
||||
// 2. Update the row to state='active' (still in the same transaction).
|
||||
// 3. Commit the transaction.
|
||||
// 4. Write the Dynadot records. If this fails we LEAVE the row in
|
||||
// state='active' and surface the error to the caller — the operator
|
||||
// decides whether to Release (which will fix DNS) or retry.
|
||||
//
|
||||
// Per the auto-memory `feedback_dynadot_dns.md`: Dynadot writes are
|
||||
// idempotent with add_dns_to_current_setting=yes, so step 4 is safe to
|
||||
// retry from outside (the wizard's retry button calls Commit again with
|
||||
// the same token and the same LB IP).
|
||||
func (a *Allocator) Commit(ctx context.Context, poolDomain, subdomain string, in CommitInput) (*store.Allocation, error) {
|
||||
if !dynadot.IsManagedDomain(poolDomain) {
|
||||
return nil, dynadot.ErrUnmanagedDomain
|
||||
}
|
||||
alloc, err := a.store.Commit(ctx, poolDomain, subdomain, store.CommitInput{
|
||||
ReservationToken: in.ReservationToken,
|
||||
SovereignFQDN: in.SovereignFQDN,
|
||||
LoadBalancerIP: in.LoadBalancerIP,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := a.dynadot.AddSovereignRecords(ctx, poolDomain, subdomain, in.LoadBalancerIP); err != nil {
|
||||
// Row is already state='active'; do not roll it back. The caller can
|
||||
// Release if they want a clean slate. Surface the DNS error so the
|
||||
// wizard's UX shows the partial-failure path.
|
||||
a.log.Error("dynadot write after commit failed",
|
||||
"poolDomain", poolDomain,
|
||||
"subdomain", subdomain,
|
||||
"loadBalancerIP", in.LoadBalancerIP,
|
||||
"err", err,
|
||||
)
|
||||
return alloc, fmt.Errorf("dynadot write: %w", err)
|
||||
}
|
||||
a.log.Info("pool-domain committed",
|
||||
"poolDomain", poolDomain,
|
||||
"subdomain", subdomain,
|
||||
"sovereignFQDN", in.SovereignFQDN,
|
||||
"loadBalancerIP", in.LoadBalancerIP,
|
||||
)
|
||||
return alloc, nil
|
||||
}
|
||||
|
||||
// Release deletes the row regardless of state, then (if the row was active)
|
||||
// removes the Dynadot DNS records. Reserved rows have no DNS side-effect to
|
||||
// clean up.
|
||||
//
|
||||
// Returns the freed Allocation (so the caller can log what was removed) or
|
||||
// store.ErrNotFound when there was nothing to release.
|
||||
func (a *Allocator) Release(ctx context.Context, poolDomain, subdomain string) (*store.Allocation, error) {
|
||||
if !dynadot.IsManagedDomain(poolDomain) {
|
||||
return nil, dynadot.ErrUnmanagedDomain
|
||||
}
|
||||
alloc, err := a.store.Release(ctx, poolDomain, subdomain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if alloc.State == store.StateActive {
|
||||
if dnsErr := a.dynadot.DeleteSubdomainRecords(ctx, poolDomain, subdomain); dnsErr != nil {
|
||||
a.log.Error("dynadot delete after release failed",
|
||||
"poolDomain", poolDomain,
|
||||
"subdomain", subdomain,
|
||||
"err", dnsErr,
|
||||
)
|
||||
return alloc, fmt.Errorf("dynadot delete: %w", dnsErr)
|
||||
}
|
||||
}
|
||||
a.log.Info("pool-domain released",
|
||||
"poolDomain", poolDomain,
|
||||
"subdomain", subdomain,
|
||||
"previousState", string(alloc.State),
|
||||
)
|
||||
return alloc, nil
|
||||
}
|
||||
|
||||
// List returns every allocation under the given pool domain.
|
||||
func (a *Allocator) List(ctx context.Context, poolDomain string) ([]store.Allocation, error) {
|
||||
if !dynadot.IsManagedDomain(poolDomain) {
|
||||
return nil, dynadot.ErrUnmanagedDomain
|
||||
}
|
||||
return a.store.List(ctx, poolDomain)
|
||||
}
|
||||
|
||||
// RunSweeper starts a background loop that periodically deletes expired
|
||||
// reservations. Cancel the parent context to stop the sweeper. Should run as
|
||||
// a goroutine off cmd/pdm/main.
|
||||
func (a *Allocator) RunSweeper(ctx context.Context, interval time.Duration) {
|
||||
if interval <= 0 {
|
||||
interval = 30 * time.Second
|
||||
}
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
a.log.Info("sweeper shutdown")
|
||||
return
|
||||
case <-ticker.C:
|
||||
deleted, err := a.store.ExpireReservations(ctx)
|
||||
if err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
a.log.Error("sweeper expire failed", "err", err)
|
||||
continue
|
||||
}
|
||||
if deleted > 0 {
|
||||
a.log.Info("sweeper expired reservations", "count", deleted)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
466
core/pool-domain-manager/internal/dynadot/dynadot.go
Normal file
466
core/pool-domain-manager/internal/dynadot/dynadot.go
Normal file
@ -0,0 +1,466 @@
|
||||
// Package dynadot — DNS API client for OpenOva-managed pool domains.
|
||||
//
|
||||
// This package is the SOLE caller of api.dynadot.com in the OpenOva fleet.
|
||||
// catalyst-api, the wizard, and every other product talk to DNS through
|
||||
// pool-domain-manager — they never import this package directly. Centralising
|
||||
// the writer means the auto-memory invariant `feedback_dynadot_dns.md`
|
||||
// (NEVER run exploratory set_dns2 — each call wipes all records) is enforced
|
||||
// architecturally: there's one writer, one commit path.
|
||||
//
|
||||
// Design choices baked in:
|
||||
//
|
||||
// - Every write uses add_dns_to_current_setting=yes so it appends rather
|
||||
// than replaces. The Dynadot API treats set_dns2 as "REPLACE the entire
|
||||
// zone" by default — the auto-memory documents an incident where this
|
||||
// wiped MX records.
|
||||
//
|
||||
// - The managed-domain list comes from runtime configuration
|
||||
// (DYNADOT_MANAGED_DOMAINS env var) per docs/INVIOLABLE-PRINCIPLES.md #4.
|
||||
// Adding a fourth pool domain is purely a secret update — no rebuild.
|
||||
//
|
||||
// - Reads (set_dns2 has no list-records counterpart) are done via the
|
||||
// get_dns command, which returns the current zone we then filter by
|
||||
// subdomain prefix when DeleteSubdomainRecords needs to clean up.
|
||||
package dynadot
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Client wraps the Dynadot REST API. Construct once and reuse.
|
||||
type Client struct {
|
||||
APIKey string
|
||||
APISecret string
|
||||
HTTP *http.Client
|
||||
}
|
||||
|
||||
// New returns a Dynadot client. Credentials come from PDM's K8s secret
|
||||
// `dynadot-api-credentials`; passing them in keeps this package free of
|
||||
// direct env-var reads (the cmd/pdm main wires it together).
|
||||
func New(apiKey, apiSecret string) *Client {
|
||||
return &Client{
|
||||
APIKey: apiKey,
|
||||
APISecret: apiSecret,
|
||||
HTTP: &http.Client{Timeout: 30 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
// Record is a single DNS record we want Dynadot to publish.
|
||||
type Record struct {
|
||||
// Subdomain — leave empty (or "@") for apex. e.g. "console", "*",
|
||||
// "*.omantel". Multi-label subdomains ARE supported; Dynadot's set_dns2
|
||||
// allows arbitrary labels in the subdomain column.
|
||||
Subdomain string
|
||||
// Type — A, AAAA, CNAME, TXT, MX, etc.
|
||||
Type string
|
||||
// Value — depends on Type. For A: IPv4 string; for CNAME: target FQDN.
|
||||
Value string
|
||||
// TTL — seconds. Dynadot supports 60, 300, 1800, 3600, 7200, 14400,
|
||||
// 28800, 43200, 86400. Defaults to 300 if zero.
|
||||
TTL int
|
||||
}
|
||||
|
||||
// AddRecord appends a single record to the domain's existing DNS configuration
|
||||
// using add_dns_to_current_setting=yes. Idempotent across re-runs as long as
|
||||
// the (subdomain, type, value) tuple is identical (Dynadot dedupes).
|
||||
func (c *Client) AddRecord(ctx context.Context, domain string, rec Record) error {
|
||||
if rec.TTL == 0 {
|
||||
rec.TTL = 300
|
||||
}
|
||||
|
||||
params := url.Values{}
|
||||
params.Set("key", c.APIKey)
|
||||
params.Set("secret", c.APISecret)
|
||||
params.Set("command", "set_dns2")
|
||||
params.Set("domain", domain)
|
||||
params.Set("add_dns_to_current_setting", "yes")
|
||||
|
||||
if rec.Subdomain == "" || rec.Subdomain == "@" {
|
||||
params.Set("main_record_type0", rec.Type)
|
||||
params.Set("main_record0", rec.Value)
|
||||
params.Set("main_recordx0", fmt.Sprintf("%d", rec.TTL))
|
||||
} else {
|
||||
params.Set("subdomain0", rec.Subdomain)
|
||||
params.Set("sub_record_type0", rec.Type)
|
||||
params.Set("sub_record0", rec.Value)
|
||||
params.Set("sub_recordx0", fmt.Sprintf("%d", rec.TTL))
|
||||
}
|
||||
|
||||
endpoint := "https://api.dynadot.com/api3.json?" + params.Encode()
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build dynadot request: %w", err)
|
||||
}
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dynadot api: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("dynadot api status %d: %s", resp.StatusCode, truncate(string(body), 256))
|
||||
}
|
||||
|
||||
var result struct {
|
||||
SetDNS2Response struct {
|
||||
ResponseHeader struct {
|
||||
ResponseCode string `json:"ResponseCode"`
|
||||
Status string `json:"Status"`
|
||||
Error string `json:"Error"`
|
||||
} `json:"ResponseHeader"`
|
||||
} `json:"SetDns2Response"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return fmt.Errorf("parse dynadot response: %w (body=%s)", err, truncate(string(body), 256))
|
||||
}
|
||||
hdr := result.SetDNS2Response.ResponseHeader
|
||||
if !strings.EqualFold(hdr.Status, "success") && !strings.EqualFold(hdr.ResponseCode, "0") {
|
||||
return fmt.Errorf("dynadot api error: code=%s status=%s err=%s", hdr.ResponseCode, hdr.Status, hdr.Error)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddSovereignRecords writes the canonical record set for a new Sovereign
|
||||
// subdomain: wildcard + canonical control-plane prefixes (console, gitea,
|
||||
// harbor, admin, api). All records point at the load balancer IP.
|
||||
//
|
||||
// Idempotent: re-running with the same (domain, subdomain, ip) is safe.
|
||||
// Re-running with a different IP appends extra records (Dynadot append
|
||||
// semantics) so the caller is responsible for calling DeleteSubdomainRecords
|
||||
// first when re-pointing.
|
||||
func (c *Client) AddSovereignRecords(ctx context.Context, domain, subdomain, lbIP string) error {
|
||||
prefixes := []string{
|
||||
"", // wildcard apex of the subdomain — *.omantel.omani.works
|
||||
"console", // console.omantel.omani.works
|
||||
"gitea", // gitea.omantel.omani.works
|
||||
"harbor", // harbor.omantel.omani.works
|
||||
"admin", // admin.omantel.omani.works
|
||||
"api", // api.omantel.omani.works
|
||||
}
|
||||
|
||||
for _, p := range prefixes {
|
||||
var sub string
|
||||
if p == "" {
|
||||
sub = "*." + subdomain
|
||||
} else {
|
||||
sub = p + "." + subdomain
|
||||
}
|
||||
if err := c.AddRecord(ctx, domain, Record{
|
||||
Subdomain: sub,
|
||||
Type: "A",
|
||||
Value: lbIP,
|
||||
TTL: 300,
|
||||
}); err != nil {
|
||||
return fmt.Errorf("add %s record: %w", sub, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteSubdomainRecords removes every record under "*.<subdomain>",
|
||||
// "<prefix>.<subdomain>" for the canonical Sovereign prefixes, by
|
||||
// re-writing the zone WITHOUT those rows. Dynadot's API has no per-record
|
||||
// delete; the path is "fetch zone, omit the rows we want gone, write it
|
||||
// back". We use add_dns_to_current_setting=no for this path because the
|
||||
// goal IS to replace the zone — but we replace it with a copy that lacks
|
||||
// the targeted rows AND preserves every other row exactly.
|
||||
//
|
||||
// To avoid the auto-memory incident (set_dns2 wiping MX/TXT records), the
|
||||
// implementation reads the full zone first via get_dns, mutates the in-
|
||||
// memory representation, and writes back the COMPLETE zone minus the
|
||||
// targeted rows. The result is a no-op for unrelated records.
|
||||
//
|
||||
// Returns nil even when no matching records existed — DeleteSubdomain is
|
||||
// idempotent.
|
||||
func (c *Client) DeleteSubdomainRecords(ctx context.Context, domain, subdomain string) error {
|
||||
zone, err := c.getZone(ctx, domain)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read zone: %w", err)
|
||||
}
|
||||
|
||||
// Targets to remove: the wildcard + each canonical prefix.
|
||||
targets := map[string]struct{}{
|
||||
"*." + subdomain: {},
|
||||
"console." + subdomain: {},
|
||||
"gitea." + subdomain: {},
|
||||
"harbor." + subdomain: {},
|
||||
"admin." + subdomain: {},
|
||||
"api." + subdomain: {},
|
||||
}
|
||||
|
||||
keep := zone.SubRecords[:0]
|
||||
for _, sr := range zone.SubRecords {
|
||||
if _, drop := targets[sr.Subdomain]; drop {
|
||||
continue
|
||||
}
|
||||
keep = append(keep, sr)
|
||||
}
|
||||
zone.SubRecords = keep
|
||||
|
||||
return c.writeZone(ctx, domain, zone)
|
||||
}
|
||||
|
||||
// zoneSnapshot is the in-memory representation of the records returned by
|
||||
// Dynadot's get_dns command, plus the apex (main) record set.
|
||||
type zoneSnapshot struct {
|
||||
MainRecords []mainRecord
|
||||
SubRecords []subRecord
|
||||
TTL int
|
||||
}
|
||||
|
||||
type mainRecord struct {
|
||||
Type string
|
||||
Value string
|
||||
}
|
||||
|
||||
type subRecord struct {
|
||||
Subdomain string
|
||||
Type string
|
||||
Value string
|
||||
}
|
||||
|
||||
// getZone reads the current zone via get_dns. Dynadot's response shape is
|
||||
// nested under GetDnsResponse.Content.MxRecords + .NameServerSettings; we
|
||||
// only care about the main + sub record arrays for the delete path.
|
||||
func (c *Client) getZone(ctx context.Context, domain string) (*zoneSnapshot, error) {
|
||||
params := url.Values{}
|
||||
params.Set("key", c.APIKey)
|
||||
params.Set("secret", c.APISecret)
|
||||
params.Set("command", "get_dns")
|
||||
params.Set("domain", domain)
|
||||
|
||||
endpoint := "https://api.dynadot.com/api3.json?" + params.Encode()
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("dynadot get_dns status %d: %s", resp.StatusCode, truncate(string(body), 256))
|
||||
}
|
||||
|
||||
var raw struct {
|
||||
GetDNSResponse struct {
|
||||
ResponseHeader struct {
|
||||
ResponseCode string `json:"ResponseCode"`
|
||||
Status string `json:"Status"`
|
||||
Error string `json:"Error"`
|
||||
} `json:"ResponseHeader"`
|
||||
Content struct {
|
||||
NameServerSettings struct {
|
||||
MainDomains []struct {
|
||||
RecordType string `json:"record_type"`
|
||||
Value string `json:"value"`
|
||||
} `json:"MainDomains"`
|
||||
SubDomains []struct {
|
||||
Subhost string `json:"Subhost"`
|
||||
RecordType string `json:"RecordType"`
|
||||
Value string `json:"Value"`
|
||||
} `json:"SubDomains"`
|
||||
TTL int `json:"TTL"`
|
||||
} `json:"NameServerSettings"`
|
||||
} `json:"Content"`
|
||||
} `json:"GetDnsResponse"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &raw); err != nil {
|
||||
return nil, fmt.Errorf("parse get_dns: %w (body=%s)", err, truncate(string(body), 256))
|
||||
}
|
||||
hdr := raw.GetDNSResponse.ResponseHeader
|
||||
if !strings.EqualFold(hdr.Status, "success") && !strings.EqualFold(hdr.ResponseCode, "0") {
|
||||
return nil, fmt.Errorf("dynadot get_dns: code=%s status=%s err=%s", hdr.ResponseCode, hdr.Status, hdr.Error)
|
||||
}
|
||||
|
||||
out := &zoneSnapshot{TTL: raw.GetDNSResponse.Content.NameServerSettings.TTL}
|
||||
for _, m := range raw.GetDNSResponse.Content.NameServerSettings.MainDomains {
|
||||
out.MainRecords = append(out.MainRecords, mainRecord{Type: m.RecordType, Value: m.Value})
|
||||
}
|
||||
for _, s := range raw.GetDNSResponse.Content.NameServerSettings.SubDomains {
|
||||
out.SubRecords = append(out.SubRecords, subRecord{Subdomain: s.Subhost, Type: s.RecordType, Value: s.Value})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// writeZone calls set_dns2 with add_dns_to_current_setting=NO and the full
|
||||
// zone serialised. This is the dangerous code path the auto-memory warns
|
||||
// about — we use it only when the caller has read the zone first via
|
||||
// getZone and wants to write back a deliberate mutation.
|
||||
func (c *Client) writeZone(ctx context.Context, domain string, zone *zoneSnapshot) error {
|
||||
params := url.Values{}
|
||||
params.Set("key", c.APIKey)
|
||||
params.Set("secret", c.APISecret)
|
||||
params.Set("command", "set_dns2")
|
||||
params.Set("domain", domain)
|
||||
// NOTE: deliberately NOT setting add_dns_to_current_setting — we want
|
||||
// replace semantics here. The zone we serialise contains every row
|
||||
// that was present minus the targeted deletions.
|
||||
|
||||
ttl := zone.TTL
|
||||
if ttl == 0 {
|
||||
ttl = 300
|
||||
}
|
||||
|
||||
for i, m := range zone.MainRecords {
|
||||
params.Set(fmt.Sprintf("main_record_type%d", i), m.Type)
|
||||
params.Set(fmt.Sprintf("main_record%d", i), m.Value)
|
||||
params.Set(fmt.Sprintf("main_recordx%d", i), fmt.Sprintf("%d", ttl))
|
||||
}
|
||||
for i, s := range zone.SubRecords {
|
||||
params.Set(fmt.Sprintf("subdomain%d", i), s.Subdomain)
|
||||
params.Set(fmt.Sprintf("sub_record_type%d", i), s.Type)
|
||||
params.Set(fmt.Sprintf("sub_record%d", i), s.Value)
|
||||
params.Set(fmt.Sprintf("sub_recordx%d", i), fmt.Sprintf("%d", ttl))
|
||||
}
|
||||
|
||||
endpoint := "https://api.dynadot.com/api3.json?" + params.Encode()
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("dynadot set_dns2 status %d: %s", resp.StatusCode, truncate(string(body), 256))
|
||||
}
|
||||
|
||||
var result struct {
|
||||
SetDNS2Response struct {
|
||||
ResponseHeader struct {
|
||||
ResponseCode string `json:"ResponseCode"`
|
||||
Status string `json:"Status"`
|
||||
Error string `json:"Error"`
|
||||
} `json:"ResponseHeader"`
|
||||
} `json:"SetDns2Response"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return fmt.Errorf("parse set_dns2: %w (body=%s)", err, truncate(string(body), 256))
|
||||
}
|
||||
hdr := result.SetDNS2Response.ResponseHeader
|
||||
if !strings.EqualFold(hdr.Status, "success") && !strings.EqualFold(hdr.ResponseCode, "0") {
|
||||
return fmt.Errorf("dynadot set_dns2 error: code=%s status=%s err=%s", hdr.ResponseCode, hdr.Status, hdr.Error)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// managedDomainsState mirrors the catalyst-api dynadot package's runtime
|
||||
// resolution: env-var first, then legacy single-domain fallback, then a
|
||||
// minimal built-in default (kept ONLY so unit tests work without an env).
|
||||
var managedDomainsState struct {
|
||||
once sync.Once
|
||||
set map[string]struct{}
|
||||
}
|
||||
|
||||
func resolveManagedDomains() map[string]struct{} {
|
||||
managedDomainsState.once.Do(func() {
|
||||
managedDomainsState.set = computeManagedDomains()
|
||||
})
|
||||
return managedDomainsState.set
|
||||
}
|
||||
|
||||
func computeManagedDomains() map[string]struct{} {
|
||||
out := make(map[string]struct{})
|
||||
if raw := os.Getenv("DYNADOT_MANAGED_DOMAINS"); strings.TrimSpace(raw) != "" {
|
||||
for _, tok := range splitDomainsList(raw) {
|
||||
out[tok] = struct{}{}
|
||||
}
|
||||
if len(out) > 0 {
|
||||
return out
|
||||
}
|
||||
}
|
||||
if d := strings.ToLower(strings.TrimSpace(os.Getenv("DYNADOT_DOMAIN"))); d != "" {
|
||||
out[d] = struct{}{}
|
||||
return out
|
||||
}
|
||||
out["openova.io"] = struct{}{}
|
||||
out["omani.works"] = struct{}{}
|
||||
return out
|
||||
}
|
||||
|
||||
// ResetManagedDomains clears the cache so tests can re-evaluate after
|
||||
// mutating env vars.
|
||||
func ResetManagedDomains() {
|
||||
managedDomainsState.once = sync.Once{}
|
||||
managedDomainsState.set = nil
|
||||
}
|
||||
|
||||
// ManagedDomains returns a sorted, deduplicated copy of the configured
|
||||
// managed-domain list. Useful for /healthz exposure and operator logs.
|
||||
func ManagedDomains() []string {
|
||||
set := resolveManagedDomains()
|
||||
out := make([]string, 0, len(set))
|
||||
for d := range set {
|
||||
out = append(out, d)
|
||||
}
|
||||
for i := 1; i < len(out); i++ {
|
||||
for j := i; j > 0 && out[j-1] > out[j]; j-- {
|
||||
out[j-1], out[j] = out[j], out[j-1]
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// IsManagedDomain reports whether the given domain is one whose DNS Dynadot
|
||||
// manages on behalf of OpenOva.
|
||||
func IsManagedDomain(domain string) bool {
|
||||
domain = strings.ToLower(strings.TrimSpace(domain))
|
||||
if domain == "" {
|
||||
return false
|
||||
}
|
||||
_, ok := resolveManagedDomains()[domain]
|
||||
return ok
|
||||
}
|
||||
|
||||
// splitDomainsList parses a `DYNADOT_MANAGED_DOMAINS`-style string —
|
||||
// comma- or whitespace-separated, lower-cased, trimmed, deduped.
|
||||
func splitDomainsList(raw string) []string {
|
||||
raw = strings.ToLower(raw)
|
||||
raw = strings.ReplaceAll(raw, ",", " ")
|
||||
parts := strings.Fields(raw)
|
||||
seen := make(map[string]struct{}, len(parts))
|
||||
out := make([]string, 0, len(parts))
|
||||
for _, p := range parts {
|
||||
p = strings.TrimSpace(p)
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[p]; ok {
|
||||
continue
|
||||
}
|
||||
seen[p] = struct{}{}
|
||||
out = append(out, p)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func truncate(s string, max int) string {
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max] + "..."
|
||||
}
|
||||
|
||||
// Errors surfaced by the package for callers that want to type-switch.
|
||||
var (
|
||||
// ErrUnmanagedDomain — caller asked for an action against a domain not in
|
||||
// DYNADOT_MANAGED_DOMAINS. Hard fail to defend against misconfiguration.
|
||||
ErrUnmanagedDomain = errors.New("domain is not in the Dynadot managed list")
|
||||
)
|
||||
62
core/pool-domain-manager/internal/dynadot/dynadot_test.go
Normal file
62
core/pool-domain-manager/internal/dynadot/dynadot_test.go
Normal file
@ -0,0 +1,62 @@
|
||||
package dynadot
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIsManagedDomainEnvVar(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "openova.io, omani.works acme.io")
|
||||
t.Setenv("DYNADOT_DOMAIN", "")
|
||||
ResetManagedDomains()
|
||||
|
||||
for _, d := range []string{"openova.io", "omani.works", "acme.io"} {
|
||||
if !IsManagedDomain(d) {
|
||||
t.Errorf("IsManagedDomain(%q) = false, want true", d)
|
||||
}
|
||||
}
|
||||
if IsManagedDomain("not-managed.com") {
|
||||
t.Errorf("IsManagedDomain(not-managed.com) = true, want false")
|
||||
}
|
||||
got := ManagedDomains()
|
||||
sort.Strings(got)
|
||||
want := []string{"acme.io", "omani.works", "openova.io"}
|
||||
if strings.Join(got, ",") != strings.Join(want, ",") {
|
||||
t.Errorf("ManagedDomains() = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsManagedDomainLegacyFallback(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "")
|
||||
t.Setenv("DYNADOT_DOMAIN", "legacy.io")
|
||||
ResetManagedDomains()
|
||||
|
||||
if !IsManagedDomain("legacy.io") {
|
||||
t.Errorf("IsManagedDomain(legacy.io) = false, want true")
|
||||
}
|
||||
if IsManagedDomain("openova.io") {
|
||||
t.Errorf("legacy fallback should not include built-in defaults")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsManagedDomainBuiltInDefaults(t *testing.T) {
|
||||
os.Unsetenv("DYNADOT_MANAGED_DOMAINS")
|
||||
os.Unsetenv("DYNADOT_DOMAIN")
|
||||
ResetManagedDomains()
|
||||
|
||||
for _, d := range []string{"openova.io", "omani.works"} {
|
||||
if !IsManagedDomain(d) {
|
||||
t.Errorf("built-in default missing %q", d)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitDomainsList(t *testing.T) {
|
||||
got := splitDomainsList("Foo.com, BAR.IO\tbaz.io ,foo.com")
|
||||
want := []string{"foo.com", "bar.io", "baz.io"}
|
||||
if strings.Join(got, ",") != strings.Join(want, ",") {
|
||||
t.Errorf("splitDomainsList = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
451
core/pool-domain-manager/internal/handler/handler.go
Normal file
451
core/pool-domain-manager/internal/handler/handler.go
Normal file
@ -0,0 +1,451 @@
|
||||
// Package handler — HTTP surface for pool-domain-manager.
|
||||
//
|
||||
// Endpoints (all JSON; per the issue body):
|
||||
//
|
||||
// GET /api/v1/pool/{domain}/check?sub=X Fast read; PDM-DB only.
|
||||
// POST /api/v1/pool/{domain}/reserve Atomic reserve; 10-min TTL.
|
||||
// POST /api/v1/pool/{domain}/commit Promote → ACTIVE + Dynadot.
|
||||
// DELETE /api/v1/pool/{domain}/release Free; remove Dynadot.
|
||||
// GET /api/v1/pool/{domain}/list Operator-facing list.
|
||||
// GET /api/v1/reserved Public reserved-name list.
|
||||
// GET /healthz Liveness probe.
|
||||
//
|
||||
// Per docs/INVIOLABLE-PRINCIPLES.md #4 the handler does not hardcode domain
|
||||
// names — every value comes from the URL path or request body, validated
|
||||
// against the runtime DYNADOT_MANAGED_DOMAINS list.
|
||||
package handler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/allocator"
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/dynadot"
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/reserved"
|
||||
"github.com/openova-io/openova/core/pool-domain-manager/internal/store"
|
||||
)
|
||||
|
||||
// Handler holds the dependencies shared by every endpoint.
|
||||
type Handler struct {
|
||||
Alloc *allocator.Allocator
|
||||
Store *store.Store // exposed for /healthz Ping
|
||||
Log *slog.Logger
|
||||
}
|
||||
|
||||
// New constructs a Handler.
|
||||
func New(alloc *allocator.Allocator, s *store.Store, log *slog.Logger) *Handler {
|
||||
return &Handler{Alloc: alloc, Store: s, Log: log}
|
||||
}
|
||||
|
||||
// Routes returns the chi.Router with all PDM routes wired up.
|
||||
func (h *Handler) Routes() *chi.Mux {
|
||||
r := chi.NewRouter()
|
||||
r.Get("/healthz", h.Healthz)
|
||||
r.Route("/api/v1", func(r chi.Router) {
|
||||
r.Get("/reserved", h.ListReserved)
|
||||
r.Route("/pool/{domain}", func(r chi.Router) {
|
||||
r.Get("/check", h.Check)
|
||||
r.Get("/list", h.List)
|
||||
r.Post("/reserve", h.Reserve)
|
||||
r.Post("/commit", h.Commit)
|
||||
r.Delete("/release", h.Release)
|
||||
})
|
||||
})
|
||||
return r
|
||||
}
|
||||
|
||||
// ── Healthz ────────────────────────────────────────────────────────────
|
||||
|
||||
// Healthz returns 200 if Postgres is reachable and the dynadot config is
|
||||
// loaded; otherwise 503. The response includes the runtime managed-domain
|
||||
// list so operators can grep for misconfiguration.
|
||||
func (h *Handler) Healthz(w http.ResponseWriter, r *http.Request) {
|
||||
if err := h.Store.Ping(r.Context()); err != nil {
|
||||
writeJSON(w, http.StatusServiceUnavailable, map[string]any{
|
||||
"status": "unhealthy",
|
||||
"db": err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"status": "ok",
|
||||
"managedDomains": dynadot.ManagedDomains(),
|
||||
})
|
||||
}
|
||||
|
||||
// ── Reserved-list ──────────────────────────────────────────────────────
|
||||
|
||||
// ListReserved exposes the canonical reserved-subdomain list. The wizard can
|
||||
// consume this to render an inline hint instead of waiting for the user to
|
||||
// type a reserved name and seeing an error.
|
||||
func (h *Handler) ListReserved(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"reserved": reserved.All(),
|
||||
})
|
||||
}
|
||||
|
||||
// ── /pool/{domain}/check ───────────────────────────────────────────────
|
||||
|
||||
// Check is the read-only availability query. Always returns 200 OK with a
|
||||
// JSON body — clients use the body's `available` field, not the HTTP
|
||||
// status, to decide.
|
||||
func (h *Handler) Check(w http.ResponseWriter, r *http.Request) {
|
||||
domain := normaliseLabel(chi.URLParam(r, "domain"))
|
||||
sub := normaliseLabel(r.URL.Query().Get("sub"))
|
||||
|
||||
if !isValidDNSLabel(sub) {
|
||||
writeJSON(w, http.StatusOK, allocator.CheckResult{
|
||||
Available: false,
|
||||
Reason: "invalid-format",
|
||||
Detail: "subdomain must be a-z, 0-9 and hyphens, start with a letter, max 63 characters",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
res, err := h.Alloc.Check(r.Context(), domain, sub)
|
||||
if err != nil {
|
||||
h.Log.Error("check failed", "domain", domain, "sub", sub, "err", err)
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, res)
|
||||
}
|
||||
|
||||
// ── /pool/{domain}/reserve ─────────────────────────────────────────────
|
||||
|
||||
// ReserveRequest is the body shape POSTed to /reserve.
|
||||
type ReserveRequest struct {
|
||||
Subdomain string `json:"subdomain"`
|
||||
CreatedBy string `json:"createdBy,omitempty"`
|
||||
}
|
||||
|
||||
// ReserveResponse is the wire shape returned to the caller.
|
||||
type ReserveResponse struct {
|
||||
PoolDomain string `json:"poolDomain"`
|
||||
Subdomain string `json:"subdomain"`
|
||||
State string `json:"state"`
|
||||
ReservedAt string `json:"reservedAt"`
|
||||
ExpiresAt string `json:"expiresAt"`
|
||||
ReservationToken string `json:"reservationToken"`
|
||||
CreatedBy string `json:"createdBy"`
|
||||
}
|
||||
|
||||
// Reserve atomically reserves the (domain, subdomain) pair for the
|
||||
// configured TTL. Returns 201 Created on success, 409 Conflict if the name
|
||||
// is taken, 422 Unprocessable Entity on validation failure.
|
||||
func (h *Handler) Reserve(w http.ResponseWriter, r *http.Request) {
|
||||
domain := normaliseLabel(chi.URLParam(r, "domain"))
|
||||
|
||||
var req ReserveRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid JSON body"})
|
||||
return
|
||||
}
|
||||
sub := normaliseLabel(req.Subdomain)
|
||||
if !isValidDNSLabel(sub) {
|
||||
writeJSON(w, http.StatusUnprocessableEntity, map[string]string{
|
||||
"error": "invalid-format",
|
||||
"detail": "subdomain must be a-z, 0-9 and hyphens, start with a letter, max 63 characters",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
alloc, err := h.Alloc.Reserve(r.Context(), domain, sub, allocator.ReserveInput{CreatedBy: req.CreatedBy})
|
||||
if err != nil {
|
||||
switch {
|
||||
case errors.Is(err, store.ErrConflict):
|
||||
writeJSON(w, http.StatusConflict, map[string]string{
|
||||
"error": "conflict",
|
||||
"detail": "this subdomain is already reserved or active",
|
||||
})
|
||||
case errors.Is(err, dynadot.ErrUnmanagedDomain):
|
||||
writeJSON(w, http.StatusUnprocessableEntity, map[string]string{
|
||||
"error": "unsupported-pool",
|
||||
"detail": "pool domain " + domain + " is not managed by OpenOva",
|
||||
})
|
||||
default:
|
||||
h.Log.Error("reserve failed", "domain", domain, "sub", sub, "err", err)
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
resp := ReserveResponse{
|
||||
PoolDomain: alloc.PoolDomain,
|
||||
Subdomain: alloc.Subdomain,
|
||||
State: string(alloc.State),
|
||||
ReservedAt: alloc.ReservedAt.Format("2006-01-02T15:04:05Z07:00"),
|
||||
ReservationToken: alloc.ReservationToken,
|
||||
CreatedBy: alloc.CreatedBy,
|
||||
}
|
||||
if alloc.ExpiresAt != nil {
|
||||
resp.ExpiresAt = alloc.ExpiresAt.Format("2006-01-02T15:04:05Z07:00")
|
||||
}
|
||||
writeJSON(w, http.StatusCreated, resp)
|
||||
}
|
||||
|
||||
// ── /pool/{domain}/commit ──────────────────────────────────────────────
|
||||
|
||||
// CommitRequest carries the data needed to flip RESERVED → ACTIVE.
|
||||
type CommitRequest struct {
|
||||
Subdomain string `json:"subdomain"`
|
||||
ReservationToken string `json:"reservationToken"`
|
||||
SovereignFQDN string `json:"sovereignFQDN"`
|
||||
LoadBalancerIP string `json:"loadBalancerIP"`
|
||||
}
|
||||
|
||||
// Commit promotes a reservation to active and writes Dynadot records.
|
||||
// Status codes:
|
||||
//
|
||||
// 200 OK — committed; row is active and DNS records exist
|
||||
// 202 Accepted — committed in DB but Dynadot write failed (caller
|
||||
// can retry Commit with same body; idempotent)
|
||||
// 404 Not Found — no row exists for this (domain, subdomain)
|
||||
// 409 Conflict — row is already active (re-commit attempt)
|
||||
// 410 Gone — reservation TTL expired before commit; caller must
|
||||
// Reserve again
|
||||
// 403 Forbidden — reservation token mismatch
|
||||
func (h *Handler) Commit(w http.ResponseWriter, r *http.Request) {
|
||||
domain := normaliseLabel(chi.URLParam(r, "domain"))
|
||||
|
||||
var req CommitRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid JSON body"})
|
||||
return
|
||||
}
|
||||
sub := normaliseLabel(req.Subdomain)
|
||||
if !isValidDNSLabel(sub) {
|
||||
writeJSON(w, http.StatusUnprocessableEntity, map[string]string{
|
||||
"error": "invalid-format",
|
||||
"detail": "subdomain must be a-z, 0-9 and hyphens, start with a letter, max 63 characters",
|
||||
})
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(req.LoadBalancerIP) == "" {
|
||||
writeJSON(w, http.StatusUnprocessableEntity, map[string]string{
|
||||
"error": "missing-lb-ip",
|
||||
"detail": "loadBalancerIP is required for commit",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
alloc, err := h.Alloc.Commit(r.Context(), domain, sub, allocator.CommitInput{
|
||||
ReservationToken: req.ReservationToken,
|
||||
SovereignFQDN: req.SovereignFQDN,
|
||||
LoadBalancerIP: req.LoadBalancerIP,
|
||||
})
|
||||
if err != nil {
|
||||
// Allocator returns a wrapped "dynadot write" error AFTER the row was
|
||||
// flipped to active. Surface 202 in that case so the caller knows the
|
||||
// row is committed but DNS is pending.
|
||||
if alloc != nil && strings.Contains(err.Error(), "dynadot write") {
|
||||
writeJSON(w, http.StatusAccepted, map[string]any{
|
||||
"warning": "row committed but Dynadot write failed; retry commit to publish DNS",
|
||||
"detail": err.Error(),
|
||||
"poolDomain": alloc.PoolDomain,
|
||||
"subdomain": alloc.Subdomain,
|
||||
"state": string(alloc.State),
|
||||
"sovereignFQDN": alloc.SovereignFQDN,
|
||||
"loadBalancerIP": alloc.LoadBalancerIP,
|
||||
})
|
||||
return
|
||||
}
|
||||
switch {
|
||||
case errors.Is(err, store.ErrNotFound):
|
||||
writeJSON(w, http.StatusNotFound, map[string]string{
|
||||
"error": "not-found",
|
||||
"detail": "no reservation exists for this (poolDomain, subdomain) — call /reserve first",
|
||||
})
|
||||
case errors.Is(err, store.ErrConflict):
|
||||
writeJSON(w, http.StatusConflict, map[string]string{
|
||||
"error": "already-active",
|
||||
"detail": "this allocation is already active",
|
||||
})
|
||||
case errors.Is(err, store.ErrTokenMismatch):
|
||||
writeJSON(w, http.StatusForbidden, map[string]string{
|
||||
"error": "token-mismatch",
|
||||
"detail": "reservation token does not match the held reservation",
|
||||
})
|
||||
case errors.Is(err, store.ErrExpired):
|
||||
writeJSON(w, http.StatusGone, map[string]string{
|
||||
"error": "reservation-expired",
|
||||
"detail": "the reservation TTL elapsed before commit; reserve again",
|
||||
})
|
||||
case errors.Is(err, dynadot.ErrUnmanagedDomain):
|
||||
writeJSON(w, http.StatusUnprocessableEntity, map[string]string{
|
||||
"error": "unsupported-pool",
|
||||
"detail": "pool domain " + domain + " is not managed by OpenOva",
|
||||
})
|
||||
default:
|
||||
h.Log.Error("commit failed", "domain", domain, "sub", sub, "err", err)
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, allocationResponse(alloc))
|
||||
}
|
||||
|
||||
// ── /pool/{domain}/release ─────────────────────────────────────────────
|
||||
|
||||
// ReleaseRequest is the body shape DELETEd to /release. We accept a body
|
||||
// rather than a query param so the wire shape matches reserve/commit.
|
||||
type ReleaseRequest struct {
|
||||
Subdomain string `json:"subdomain"`
|
||||
}
|
||||
|
||||
// Release deletes the row and removes Dynadot records (when state was
|
||||
// active). Returns 200 OK on success, 404 if no row, 422 on validation.
|
||||
//
|
||||
// We do NOT require the reservation token here — Release is operator-side
|
||||
// (also invoked by catalyst-api on tofu destroy) and the catalyst-api may
|
||||
// not still hold the original token by the time the destroy fires.
|
||||
func (h *Handler) Release(w http.ResponseWriter, r *http.Request) {
|
||||
domain := normaliseLabel(chi.URLParam(r, "domain"))
|
||||
|
||||
var req ReleaseRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
// Allow ?sub= query fallback so curl -X DELETE without a body works.
|
||||
req.Subdomain = r.URL.Query().Get("sub")
|
||||
}
|
||||
sub := normaliseLabel(req.Subdomain)
|
||||
if !isValidDNSLabel(sub) {
|
||||
writeJSON(w, http.StatusUnprocessableEntity, map[string]string{
|
||||
"error": "invalid-format",
|
||||
"detail": "subdomain must be a-z, 0-9 and hyphens, start with a letter, max 63 characters",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
alloc, err := h.Alloc.Release(r.Context(), domain, sub)
|
||||
if err != nil {
|
||||
// Partial: row deleted but Dynadot delete failed.
|
||||
if alloc != nil && strings.Contains(err.Error(), "dynadot delete") {
|
||||
writeJSON(w, http.StatusAccepted, map[string]any{
|
||||
"warning": "row deleted but Dynadot delete failed; operator must clean up DNS manually",
|
||||
"detail": err.Error(),
|
||||
"freed": allocationResponse(alloc),
|
||||
})
|
||||
return
|
||||
}
|
||||
switch {
|
||||
case errors.Is(err, store.ErrNotFound):
|
||||
writeJSON(w, http.StatusNotFound, map[string]string{
|
||||
"error": "not-found",
|
||||
"detail": "no allocation exists for this (poolDomain, subdomain)",
|
||||
})
|
||||
case errors.Is(err, dynadot.ErrUnmanagedDomain):
|
||||
writeJSON(w, http.StatusUnprocessableEntity, map[string]string{
|
||||
"error": "unsupported-pool",
|
||||
"detail": "pool domain " + domain + " is not managed by OpenOva",
|
||||
})
|
||||
default:
|
||||
h.Log.Error("release failed", "domain", domain, "sub", sub, "err", err)
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"freed": allocationResponse(alloc),
|
||||
})
|
||||
}
|
||||
|
||||
// ── /pool/{domain}/list ────────────────────────────────────────────────
|
||||
|
||||
// List returns every allocation for the given pool domain. Operator-only;
|
||||
// the manifest gates the path behind a Traefik auth middleware.
|
||||
func (h *Handler) List(w http.ResponseWriter, r *http.Request) {
|
||||
domain := normaliseLabel(chi.URLParam(r, "domain"))
|
||||
allocs, err := h.Alloc.List(r.Context(), domain)
|
||||
if err != nil {
|
||||
if errors.Is(err, dynadot.ErrUnmanagedDomain) {
|
||||
writeJSON(w, http.StatusUnprocessableEntity, map[string]string{
|
||||
"error": "unsupported-pool",
|
||||
"detail": "pool domain " + domain + " is not managed by OpenOva",
|
||||
})
|
||||
return
|
||||
}
|
||||
h.Log.Error("list failed", "domain", domain, "err", err)
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
out := make([]map[string]any, 0, len(allocs))
|
||||
for i := range allocs {
|
||||
out = append(out, allocationResponse(&allocs[i]))
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"poolDomain": domain,
|
||||
"allocations": out,
|
||||
})
|
||||
}
|
||||
|
||||
// ── helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
func writeJSON(w http.ResponseWriter, code int, v any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(code)
|
||||
_ = json.NewEncoder(w).Encode(v)
|
||||
}
|
||||
|
||||
func normaliseLabel(s string) string {
|
||||
return strings.ToLower(strings.TrimSpace(s))
|
||||
}
|
||||
|
||||
// isValidDNSLabel validates an RFC 1035 label (lower-case-only, since we
|
||||
// normalise upstream).
|
||||
func isValidDNSLabel(s string) bool {
|
||||
if s == "" || len(s) > 63 {
|
||||
return false
|
||||
}
|
||||
// Allow domain labels with embedded dots ONLY for the {domain} URL
|
||||
// param — those are validated separately via dynadot.IsManagedDomain.
|
||||
// For subdomain inputs we require a single label.
|
||||
for i, r := range s {
|
||||
switch {
|
||||
case r >= 'a' && r <= 'z':
|
||||
continue
|
||||
case r >= '0' && r <= '9':
|
||||
if i == 0 {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
case r == '-':
|
||||
if i == 0 || i == len(s)-1 {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func allocationResponse(a *store.Allocation) map[string]any {
|
||||
out := map[string]any{
|
||||
"poolDomain": a.PoolDomain,
|
||||
"subdomain": a.Subdomain,
|
||||
"state": string(a.State),
|
||||
"reservedAt": a.ReservedAt.Format("2006-01-02T15:04:05Z07:00"),
|
||||
"createdBy": a.CreatedBy,
|
||||
}
|
||||
if a.ExpiresAt != nil {
|
||||
out["expiresAt"] = a.ExpiresAt.Format("2006-01-02T15:04:05Z07:00")
|
||||
}
|
||||
if a.SovereignFQDN != "" {
|
||||
out["sovereignFQDN"] = a.SovereignFQDN
|
||||
}
|
||||
if a.LoadBalancerIP != "" {
|
||||
out["loadBalancerIP"] = a.LoadBalancerIP
|
||||
}
|
||||
if a.ReservationToken != "" {
|
||||
out["reservationToken"] = a.ReservationToken
|
||||
}
|
||||
return out
|
||||
}
|
||||
77
core/pool-domain-manager/internal/reserved/reserved.go
Normal file
77
core/pool-domain-manager/internal/reserved/reserved.go
Normal file
@ -0,0 +1,77 @@
|
||||
// Package reserved holds the canonical list of subdomain names that no tenant
|
||||
// may claim under any OpenOva pool domain. It used to live in
|
||||
// products/catalyst/bootstrap/api/internal/handler/subdomains.go as a private
|
||||
// var — duplicated knowledge with no clear owner.
|
||||
//
|
||||
// Per docs/INVIOLABLE-PRINCIPLES.md #4 the list lives in ONE place: PDM.
|
||||
// catalyst-api consults PDM via /check; the wizard consults catalyst-api via
|
||||
// /api/v1/subdomains/check. There is no second copy of this list anywhere
|
||||
// else in the fleet.
|
||||
//
|
||||
// The list is the union of:
|
||||
// - control-plane prefixes the OpenTofu module will materialise as DNS
|
||||
// records on every Sovereign (api, admin, console, gitea, harbor — see
|
||||
// dynadot.AddSovereignRecords)
|
||||
// - infrastructure prefixes that map to specific OpenOva services
|
||||
// (openova, catalyst, openbao, vault, flux, k8s, system)
|
||||
// - operational prefixes that look enough like a Sovereign to be
|
||||
// dangerous if a tenant grabbed them (www, mail, smtp, imap, vpn, app,
|
||||
// status, docs)
|
||||
//
|
||||
// The IsReserved() function is the only exported surface — callers don't
|
||||
// see (and can't mutate) the underlying map.
|
||||
package reserved
|
||||
|
||||
import "strings"
|
||||
|
||||
// reservedSubdomains — names we never let a tenant claim as their Sovereign
|
||||
// root subdomain. Tenants get *.omantel.omani.works style records
|
||||
// automatically; allowing a tenant to claim "console" would create
|
||||
// "console.console.omani.works" which is meaningless and confusing.
|
||||
var reservedSubdomains = map[string]struct{}{
|
||||
"api": {},
|
||||
"admin": {},
|
||||
"console": {},
|
||||
"gitea": {},
|
||||
"harbor": {},
|
||||
"keycloak": {},
|
||||
"www": {},
|
||||
"mail": {},
|
||||
"smtp": {},
|
||||
"imap": {},
|
||||
"vpn": {},
|
||||
"openova": {},
|
||||
"catalyst": {},
|
||||
"docs": {},
|
||||
"status": {},
|
||||
"app": {},
|
||||
"system": {},
|
||||
"openbao": {},
|
||||
"vault": {},
|
||||
"flux": {},
|
||||
"k8s": {},
|
||||
}
|
||||
|
||||
// IsReserved reports whether the given subdomain (lower-cased, trimmed) is
|
||||
// in the reserved set. Caller is responsible for validating the input is a
|
||||
// well-formed DNS label first; this function only checks set membership.
|
||||
func IsReserved(subdomain string) bool {
|
||||
_, ok := reservedSubdomains[strings.ToLower(strings.TrimSpace(subdomain))]
|
||||
return ok
|
||||
}
|
||||
|
||||
// All returns a sorted copy of the reserved list. Used by /api/v1/reserved
|
||||
// for clients (e.g. the wizard) that want to render the list inline as a
|
||||
// hint to the user.
|
||||
func All() []string {
|
||||
out := make([]string, 0, len(reservedSubdomains))
|
||||
for k := range reservedSubdomains {
|
||||
out = append(out, k)
|
||||
}
|
||||
for i := 1; i < len(out); i++ {
|
||||
for j := i; j > 0 && out[j-1] > out[j]; j-- {
|
||||
out[j-1], out[j] = out[j], out[j-1]
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
45
core/pool-domain-manager/internal/reserved/reserved_test.go
Normal file
45
core/pool-domain-manager/internal/reserved/reserved_test.go
Normal file
@ -0,0 +1,45 @@
|
||||
package reserved
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestIsReserved(t *testing.T) {
|
||||
cases := map[string]bool{
|
||||
"api": true,
|
||||
"console": true,
|
||||
"openbao": true,
|
||||
"k8s": true,
|
||||
"omantel": false,
|
||||
"acme": false,
|
||||
"": false,
|
||||
"API": true, // case-insensitive
|
||||
" api ": true, // trims whitespace
|
||||
"foo-bar": false,
|
||||
"my-corp": false,
|
||||
}
|
||||
for k, want := range cases {
|
||||
if got := IsReserved(k); got != want {
|
||||
t.Errorf("IsReserved(%q) = %v, want %v", k, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllSorted(t *testing.T) {
|
||||
names := All()
|
||||
if len(names) == 0 {
|
||||
t.Fatal("expected non-empty reserved list")
|
||||
}
|
||||
for i := 1; i < len(names); i++ {
|
||||
if names[i-1] >= names[i] {
|
||||
t.Errorf("All() not sorted at index %d: %q >= %q", i, names[i-1], names[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllNoReachableNames(t *testing.T) {
|
||||
// Sanity-check: every name in All() must be IsReserved.
|
||||
for _, n := range All() {
|
||||
if !IsReserved(n) {
|
||||
t.Errorf("%q is in All() but IsReserved returned false", n)
|
||||
}
|
||||
}
|
||||
}
|
||||
31
core/pool-domain-manager/internal/store/migrations.sql
Normal file
31
core/pool-domain-manager/internal/store/migrations.sql
Normal file
@ -0,0 +1,31 @@
|
||||
-- pool-domain-manager schema, applied idempotently at process start.
|
||||
-- Per docs/INVIOLABLE-PRINCIPLES.md #3 the database is CloudNativePG and
|
||||
-- the schema lives in this single file (no Atlas / Goose / Flyway needed
|
||||
-- for two tables); running the same SQL repeatedly is safe.
|
||||
--
|
||||
-- The CHECK constraint on state and the PRIMARY KEY on (pool_domain,
|
||||
-- subdomain) together guarantee that no two callers can hold a name in
|
||||
-- conflicting states. The expires_at index speeds up the sweeper.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS pool_allocations (
|
||||
pool_domain TEXT NOT NULL,
|
||||
subdomain TEXT NOT NULL,
|
||||
state TEXT NOT NULL CHECK (state IN ('reserved', 'active')),
|
||||
reserved_at TIMESTAMPTZ NOT NULL,
|
||||
expires_at TIMESTAMPTZ,
|
||||
sovereign_fqdn TEXT,
|
||||
load_balancer_ip TEXT,
|
||||
reservation_token UUID,
|
||||
created_by TEXT NOT NULL,
|
||||
PRIMARY KEY (pool_domain, subdomain)
|
||||
);
|
||||
|
||||
-- Sweeper-friendly partial index — only the (small) set of reserved rows
|
||||
-- need to be scanned for TTL expiry.
|
||||
CREATE INDEX IF NOT EXISTS pool_allocations_expires_idx
|
||||
ON pool_allocations (expires_at)
|
||||
WHERE state = 'reserved';
|
||||
|
||||
-- Operator-facing index — list all active rows for a pool fast.
|
||||
CREATE INDEX IF NOT EXISTS pool_allocations_state_idx
|
||||
ON pool_allocations (pool_domain, state);
|
||||
463
core/pool-domain-manager/internal/store/store.go
Normal file
463
core/pool-domain-manager/internal/store/store.go
Normal file
@ -0,0 +1,463 @@
|
||||
// Package store — CloudNativePG / Postgres persistence for pool-domain-manager.
|
||||
//
|
||||
// The PDM owns a single table — pool_allocations — that holds the canonical
|
||||
// allocation state for every (pool_domain, subdomain) pair the OpenOva fleet
|
||||
// has ever reserved or activated. The table is intentionally simple: PDM is
|
||||
// a small, stateless HTTP service backed by a single-writer Postgres database
|
||||
// (CloudNativePG running in the openova-system namespace). Concurrency is
|
||||
// resolved by Postgres row-level locks + UPSERT semantics rather than any
|
||||
// application-level mutex.
|
||||
//
|
||||
// Schema (also encoded as a migration in migrations.sql):
|
||||
//
|
||||
// CREATE TABLE pool_allocations (
|
||||
// pool_domain TEXT NOT NULL,
|
||||
// subdomain TEXT NOT NULL,
|
||||
// state TEXT NOT NULL CHECK (state IN ('reserved','active')),
|
||||
// reserved_at TIMESTAMPTZ NOT NULL,
|
||||
// expires_at TIMESTAMPTZ, -- NULL when state='active'
|
||||
// sovereign_fqdn TEXT, -- set when state='active'
|
||||
// load_balancer_ip TEXT, -- set when state='active'
|
||||
// reservation_token UUID, -- set when state='reserved'
|
||||
// created_by TEXT NOT NULL,
|
||||
// PRIMARY KEY (pool_domain, subdomain)
|
||||
// );
|
||||
// CREATE INDEX pool_allocations_expires_idx ON pool_allocations (expires_at)
|
||||
// WHERE state = 'reserved';
|
||||
//
|
||||
// Per docs/INVIOLABLE-PRINCIPLES.md #4 the connection string is read from the
|
||||
// PDM_DATABASE_URL env var — never hardcoded. The K8s ExternalSecret pulls
|
||||
// the credentials out of CNPG's auto-generated app secret and projects them
|
||||
// here.
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgconn"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
// State enumerates the three lifecycle states of a (pool, subdomain) row.
|
||||
// NULL — implicit (no row) — is the fourth state, represented by absence.
|
||||
type State string
|
||||
|
||||
const (
|
||||
// StateReserved — the name is held with a TTL. expires_at is non-NULL.
|
||||
// On TTL expiry the row is deleted by the sweeper goroutine.
|
||||
StateReserved State = "reserved"
|
||||
// StateActive — the name has been committed and Dynadot DNS records have
|
||||
// been written. expires_at is NULL; the row stays until Release.
|
||||
StateActive State = "active"
|
||||
)
|
||||
|
||||
// ErrConflict — somebody else holds this (pool_domain, subdomain) — used by
|
||||
// the allocator to map to HTTP 409 Conflict.
|
||||
var ErrConflict = errors.New("pool allocation conflict — name already reserved or active")
|
||||
|
||||
// ErrNotFound — no row exists for the (pool_domain, subdomain) pair. The
|
||||
// handlers map this to HTTP 404.
|
||||
var ErrNotFound = errors.New("pool allocation not found")
|
||||
|
||||
// Allocation is the persistent shape of a row in pool_allocations.
|
||||
type Allocation struct {
|
||||
PoolDomain string `json:"poolDomain"`
|
||||
Subdomain string `json:"subdomain"`
|
||||
State State `json:"state"`
|
||||
ReservedAt time.Time `json:"reservedAt"`
|
||||
ExpiresAt *time.Time `json:"expiresAt,omitempty"`
|
||||
SovereignFQDN string `json:"sovereignFQDN,omitempty"`
|
||||
LoadBalancerIP string `json:"loadBalancerIP,omitempty"`
|
||||
ReservationToken string `json:"reservationToken,omitempty"`
|
||||
CreatedBy string `json:"createdBy"`
|
||||
}
|
||||
|
||||
// Store wraps the pgxpool.Pool with the SQL operations PDM needs.
|
||||
type Store struct {
|
||||
pool *pgxpool.Pool
|
||||
}
|
||||
|
||||
// New connects to Postgres using the DSN, applies migrations, and returns a
|
||||
// ready-to-use Store. Caller is responsible for calling Close on shutdown.
|
||||
func New(ctx context.Context, dsn string) (*Store, error) {
|
||||
cfg, err := pgxpool.ParseConfig(dsn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse PDM_DATABASE_URL: %w", err)
|
||||
}
|
||||
// Modest pool — PDM handles low QPS (a wizard click rate of order 0.1/s
|
||||
// fleet-wide) and we'd rather queue than monopolise CNPG connections.
|
||||
cfg.MaxConns = 8
|
||||
cfg.MinConns = 1
|
||||
cfg.MaxConnLifetime = 30 * time.Minute
|
||||
cfg.MaxConnIdleTime = 5 * time.Minute
|
||||
|
||||
pool, err := pgxpool.NewWithConfig(ctx, cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("connect postgres: %w", err)
|
||||
}
|
||||
if err := pool.Ping(ctx); err != nil {
|
||||
pool.Close()
|
||||
return nil, fmt.Errorf("ping postgres: %w", err)
|
||||
}
|
||||
|
||||
s := &Store{pool: pool}
|
||||
if err := s.migrate(ctx); err != nil {
|
||||
pool.Close()
|
||||
return nil, fmt.Errorf("apply migrations: %w", err)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Close releases the underlying connection pool.
|
||||
func (s *Store) Close() {
|
||||
s.pool.Close()
|
||||
}
|
||||
|
||||
// Ping verifies the database is reachable. Used by /healthz.
|
||||
func (s *Store) Ping(ctx context.Context) error {
|
||||
return s.pool.Ping(ctx)
|
||||
}
|
||||
|
||||
//go:embed migrations.sql
|
||||
var migrationsSQL string
|
||||
|
||||
// migrate applies the embedded migrations.sql idempotently. We deliberately
|
||||
// avoid a full migration framework — the schema is tiny and idempotent SQL
|
||||
// keeps PDM's startup path one TCP connect + one CREATE TABLE IF NOT EXISTS.
|
||||
func (s *Store) migrate(ctx context.Context) error {
|
||||
_, err := s.pool.Exec(ctx, migrationsSQL)
|
||||
return err
|
||||
}
|
||||
|
||||
// Reserve atomically inserts a row in state='reserved' with the given TTL.
|
||||
// Returns ErrConflict if a row already exists for the (pool, subdomain) pair
|
||||
// in any state — including an expired reservation that the sweeper has not
|
||||
// yet collected (we treat sweeper lag conservatively and rely on the caller
|
||||
// running ExpireReservations periodically; the SQL also prunes expired rows
|
||||
// for the specific key it touches as part of the same transaction).
|
||||
func (s *Store) Reserve(ctx context.Context, poolDomain, subdomain string, ttl time.Duration, createdBy string) (*Allocation, error) {
|
||||
if ttl <= 0 {
|
||||
return nil, fmt.Errorf("Reserve: ttl must be positive, got %s", ttl)
|
||||
}
|
||||
tx, err := s.pool.Begin(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("begin tx: %w", err)
|
||||
}
|
||||
defer tx.Rollback(ctx) //nolint:errcheck
|
||||
|
||||
// Step 1: opportunistic prune — if a row exists for this exact key with
|
||||
// state='reserved' and expires_at in the past, delete it. This keeps the
|
||||
// reservation path responsive even if the background sweeper is slow.
|
||||
if _, err := tx.Exec(ctx, `
|
||||
DELETE FROM pool_allocations
|
||||
WHERE pool_domain = $1
|
||||
AND subdomain = $2
|
||||
AND state = 'reserved'
|
||||
AND expires_at < NOW()
|
||||
`, poolDomain, subdomain); err != nil {
|
||||
return nil, fmt.Errorf("prune expired: %w", err)
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
expires := now.Add(ttl)
|
||||
token := uuid.New()
|
||||
|
||||
// Step 2: insert. If the row already exists (active OR not-yet-expired
|
||||
// reservation) the unique constraint fires and we return ErrConflict.
|
||||
_, err = tx.Exec(ctx, `
|
||||
INSERT INTO pool_allocations
|
||||
(pool_domain, subdomain, state, reserved_at, expires_at, reservation_token, created_by)
|
||||
VALUES
|
||||
($1, $2, 'reserved', $3, $4, $5, $6)
|
||||
`, poolDomain, subdomain, now, expires, token, createdBy)
|
||||
if err != nil {
|
||||
var pgErr *pgconn.PgError
|
||||
if errors.As(err, &pgErr) && pgErr.Code == "23505" /* unique_violation */ {
|
||||
return nil, ErrConflict
|
||||
}
|
||||
return nil, fmt.Errorf("insert reservation: %w", err)
|
||||
}
|
||||
|
||||
if err := tx.Commit(ctx); err != nil {
|
||||
return nil, fmt.Errorf("commit reservation: %w", err)
|
||||
}
|
||||
|
||||
exp := expires
|
||||
return &Allocation{
|
||||
PoolDomain: poolDomain,
|
||||
Subdomain: subdomain,
|
||||
State: StateReserved,
|
||||
ReservedAt: now,
|
||||
ExpiresAt: &exp,
|
||||
ReservationToken: token.String(),
|
||||
CreatedBy: createdBy,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Get returns the current allocation for the (pool, subdomain) pair, or
|
||||
// ErrNotFound. Get does NOT prune expired reservations on read — callers
|
||||
// who want fresh results should run after ExpireReservations or accept that
|
||||
// /check may briefly return state='reserved' for a row that has just expired.
|
||||
func (s *Store) Get(ctx context.Context, poolDomain, subdomain string) (*Allocation, error) {
|
||||
row := s.pool.QueryRow(ctx, `
|
||||
SELECT pool_domain, subdomain, state, reserved_at, expires_at,
|
||||
sovereign_fqdn, load_balancer_ip, reservation_token, created_by
|
||||
FROM pool_allocations
|
||||
WHERE pool_domain = $1 AND subdomain = $2
|
||||
`, poolDomain, subdomain)
|
||||
|
||||
var a Allocation
|
||||
var (
|
||||
expiresAt *time.Time
|
||||
sovereignFQDN *string
|
||||
loadBalancerIP *string
|
||||
reservationToken *uuid.UUID
|
||||
)
|
||||
err := row.Scan(
|
||||
&a.PoolDomain,
|
||||
&a.Subdomain,
|
||||
&a.State,
|
||||
&a.ReservedAt,
|
||||
&expiresAt,
|
||||
&sovereignFQDN,
|
||||
&loadBalancerIP,
|
||||
&reservationToken,
|
||||
&a.CreatedBy,
|
||||
)
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scan allocation: %w", err)
|
||||
}
|
||||
a.ExpiresAt = expiresAt
|
||||
if sovereignFQDN != nil {
|
||||
a.SovereignFQDN = *sovereignFQDN
|
||||
}
|
||||
if loadBalancerIP != nil {
|
||||
a.LoadBalancerIP = *loadBalancerIP
|
||||
}
|
||||
if reservationToken != nil {
|
||||
a.ReservationToken = reservationToken.String()
|
||||
}
|
||||
return &a, nil
|
||||
}
|
||||
|
||||
// IsAvailable reports whether the (pool, subdomain) pair is free for a fresh
|
||||
// reservation. A row in state='reserved' that has expired is treated as free
|
||||
// (we transparently prune it on next Reserve). All other rows = taken.
|
||||
func (s *Store) IsAvailable(ctx context.Context, poolDomain, subdomain string) (bool, error) {
|
||||
a, err := s.Get(ctx, poolDomain, subdomain)
|
||||
if errors.Is(err, ErrNotFound) {
|
||||
return true, nil
|
||||
}
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if a.State == StateReserved && a.ExpiresAt != nil && a.ExpiresAt.Before(time.Now().UTC()) {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// CommitInput is what the /commit endpoint provides.
|
||||
type CommitInput struct {
|
||||
ReservationToken string
|
||||
SovereignFQDN string
|
||||
LoadBalancerIP string
|
||||
}
|
||||
|
||||
// Commit promotes an existing reservation to state='active'. Verifies the
|
||||
// reservation_token matches (so a stale wizard tab can't commit somebody
|
||||
// else's reservation) and that the reservation has not yet expired.
|
||||
//
|
||||
// Returns ErrNotFound if the row is gone, ErrConflict if it is already
|
||||
// active, ErrTokenMismatch if the token doesn't match, and ErrExpired if the
|
||||
// reservation TTL elapsed.
|
||||
func (s *Store) Commit(ctx context.Context, poolDomain, subdomain string, in CommitInput) (*Allocation, error) {
|
||||
tx, err := s.pool.Begin(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("begin tx: %w", err)
|
||||
}
|
||||
defer tx.Rollback(ctx) //nolint:errcheck
|
||||
|
||||
// Lock the row for the duration of this transaction.
|
||||
row := tx.QueryRow(ctx, `
|
||||
SELECT state, expires_at, reservation_token
|
||||
FROM pool_allocations
|
||||
WHERE pool_domain = $1 AND subdomain = $2
|
||||
FOR UPDATE
|
||||
`, poolDomain, subdomain)
|
||||
var (
|
||||
state State
|
||||
expiresAt *time.Time
|
||||
reservationToken *uuid.UUID
|
||||
)
|
||||
if err := row.Scan(&state, &expiresAt, &reservationToken); err != nil {
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
return nil, fmt.Errorf("lock row: %w", err)
|
||||
}
|
||||
|
||||
if state == StateActive {
|
||||
return nil, ErrConflict
|
||||
}
|
||||
if expiresAt != nil && expiresAt.Before(time.Now().UTC()) {
|
||||
return nil, ErrExpired
|
||||
}
|
||||
wantToken, err := uuid.Parse(in.ReservationToken)
|
||||
if err != nil {
|
||||
return nil, ErrTokenMismatch
|
||||
}
|
||||
if reservationToken == nil || *reservationToken != wantToken {
|
||||
return nil, ErrTokenMismatch
|
||||
}
|
||||
|
||||
if _, err := tx.Exec(ctx, `
|
||||
UPDATE pool_allocations
|
||||
SET state = 'active',
|
||||
expires_at = NULL,
|
||||
reservation_token = NULL,
|
||||
sovereign_fqdn = $3,
|
||||
load_balancer_ip = $4
|
||||
WHERE pool_domain = $1 AND subdomain = $2
|
||||
`, poolDomain, subdomain, in.SovereignFQDN, in.LoadBalancerIP); err != nil {
|
||||
return nil, fmt.Errorf("commit update: %w", err)
|
||||
}
|
||||
|
||||
if err := tx.Commit(ctx); err != nil {
|
||||
return nil, fmt.Errorf("commit tx: %w", err)
|
||||
}
|
||||
|
||||
return s.Get(ctx, poolDomain, subdomain)
|
||||
}
|
||||
|
||||
// Release deletes the row for (pool, subdomain) regardless of state. Returns
|
||||
// the released row's previous state so the handler can decide whether to
|
||||
// fire Dynadot delete calls (only state='active' rows have DNS records).
|
||||
func (s *Store) Release(ctx context.Context, poolDomain, subdomain string) (*Allocation, error) {
|
||||
row := s.pool.QueryRow(ctx, `
|
||||
DELETE FROM pool_allocations
|
||||
WHERE pool_domain = $1 AND subdomain = $2
|
||||
RETURNING pool_domain, subdomain, state, reserved_at, expires_at,
|
||||
sovereign_fqdn, load_balancer_ip, reservation_token, created_by
|
||||
`, poolDomain, subdomain)
|
||||
|
||||
var a Allocation
|
||||
var (
|
||||
expiresAt *time.Time
|
||||
sovereignFQDN *string
|
||||
loadBalancerIP *string
|
||||
reservationToken *uuid.UUID
|
||||
)
|
||||
err := row.Scan(
|
||||
&a.PoolDomain,
|
||||
&a.Subdomain,
|
||||
&a.State,
|
||||
&a.ReservedAt,
|
||||
&expiresAt,
|
||||
&sovereignFQDN,
|
||||
&loadBalancerIP,
|
||||
&reservationToken,
|
||||
&a.CreatedBy,
|
||||
)
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("delete allocation: %w", err)
|
||||
}
|
||||
a.ExpiresAt = expiresAt
|
||||
if sovereignFQDN != nil {
|
||||
a.SovereignFQDN = *sovereignFQDN
|
||||
}
|
||||
if loadBalancerIP != nil {
|
||||
a.LoadBalancerIP = *loadBalancerIP
|
||||
}
|
||||
if reservationToken != nil {
|
||||
a.ReservationToken = reservationToken.String()
|
||||
}
|
||||
return &a, nil
|
||||
}
|
||||
|
||||
// List returns every allocation for the given pool domain. Used by the
|
||||
// operator-facing /list endpoint.
|
||||
func (s *Store) List(ctx context.Context, poolDomain string) ([]Allocation, error) {
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT pool_domain, subdomain, state, reserved_at, expires_at,
|
||||
sovereign_fqdn, load_balancer_ip, reservation_token, created_by
|
||||
FROM pool_allocations
|
||||
WHERE pool_domain = $1
|
||||
ORDER BY subdomain
|
||||
`, poolDomain)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list allocations: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []Allocation
|
||||
for rows.Next() {
|
||||
var a Allocation
|
||||
var (
|
||||
expiresAt *time.Time
|
||||
sovereignFQDN *string
|
||||
loadBalancerIP *string
|
||||
reservationToken *uuid.UUID
|
||||
)
|
||||
if err := rows.Scan(
|
||||
&a.PoolDomain,
|
||||
&a.Subdomain,
|
||||
&a.State,
|
||||
&a.ReservedAt,
|
||||
&expiresAt,
|
||||
&sovereignFQDN,
|
||||
&loadBalancerIP,
|
||||
&reservationToken,
|
||||
&a.CreatedBy,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("scan list row: %w", err)
|
||||
}
|
||||
a.ExpiresAt = expiresAt
|
||||
if sovereignFQDN != nil {
|
||||
a.SovereignFQDN = *sovereignFQDN
|
||||
}
|
||||
if loadBalancerIP != nil {
|
||||
a.LoadBalancerIP = *loadBalancerIP
|
||||
}
|
||||
if reservationToken != nil {
|
||||
a.ReservationToken = reservationToken.String()
|
||||
}
|
||||
out = append(out, a)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// ExpireReservations deletes every state='reserved' row whose expires_at is
|
||||
// in the past. Returns the count of rows deleted. Called periodically by the
|
||||
// sweeper goroutine.
|
||||
func (s *Store) ExpireReservations(ctx context.Context) (int64, error) {
|
||||
tag, err := s.pool.Exec(ctx, `
|
||||
DELETE FROM pool_allocations
|
||||
WHERE state = 'reserved'
|
||||
AND expires_at < NOW()
|
||||
`)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("expire reservations: %w", err)
|
||||
}
|
||||
return tag.RowsAffected(), nil
|
||||
}
|
||||
|
||||
// ErrTokenMismatch — the reservation_token in the request did not match
|
||||
// the row's stored token. The caller likely is a stale tab.
|
||||
var ErrTokenMismatch = errors.New("reservation token does not match")
|
||||
|
||||
// ErrExpired — the reservation TTL has elapsed; the caller must Reserve
|
||||
// again before committing.
|
||||
var ErrExpired = errors.New("reservation expired before commit")
|
||||
180
core/pool-domain-manager/internal/store/store_test.go
Normal file
180
core/pool-domain-manager/internal/store/store_test.go
Normal file
@ -0,0 +1,180 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// integrationDSN — set CI/local env to a writable Postgres for these tests.
|
||||
// When unset, the integration tests skip; the rest of the package gets
|
||||
// covered by allocator/handler tests with a thin in-memory shim. We
|
||||
// deliberately don't pull testcontainers into the build path — Catalyst-
|
||||
// Zero CI already runs Postgres as a service for other suites and the same
|
||||
// container can host PDM's tests via PDM_TEST_DSN.
|
||||
func integrationDSN(t *testing.T) string {
|
||||
t.Helper()
|
||||
dsn := os.Getenv("PDM_TEST_DSN")
|
||||
if dsn == "" {
|
||||
t.Skip("PDM_TEST_DSN not set — skipping integration test")
|
||||
}
|
||||
return dsn
|
||||
}
|
||||
|
||||
func newTestStore(t *testing.T) *Store {
|
||||
t.Helper()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
s, err := New(ctx, integrationDSN(t))
|
||||
if err != nil {
|
||||
t.Fatalf("connect: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
// Truncate after each test so subsequent runs start clean.
|
||||
_, _ = s.pool.Exec(context.Background(), `TRUNCATE pool_allocations`)
|
||||
s.Close()
|
||||
})
|
||||
return s
|
||||
}
|
||||
|
||||
func TestReserveHappyPath(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
ctx := context.Background()
|
||||
|
||||
a, err := s.Reserve(ctx, "omani.works", "tenant1", 10*time.Minute, "test")
|
||||
if err != nil {
|
||||
t.Fatalf("reserve: %v", err)
|
||||
}
|
||||
if a.State != StateReserved {
|
||||
t.Errorf("state=%s want reserved", a.State)
|
||||
}
|
||||
if _, err := uuid.Parse(a.ReservationToken); err != nil {
|
||||
t.Errorf("reservation token not a UUID: %v", err)
|
||||
}
|
||||
if a.ExpiresAt == nil || a.ExpiresAt.Before(time.Now().UTC()) {
|
||||
t.Errorf("expiresAt must be in the future, got %v", a.ExpiresAt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReserveConflict(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
ctx := context.Background()
|
||||
|
||||
if _, err := s.Reserve(ctx, "omani.works", "tenant1", 10*time.Minute, "test"); err != nil {
|
||||
t.Fatalf("first reserve: %v", err)
|
||||
}
|
||||
_, err := s.Reserve(ctx, "omani.works", "tenant1", 10*time.Minute, "test")
|
||||
if !errors.Is(err, ErrConflict) {
|
||||
t.Fatalf("second reserve: want ErrConflict, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpiryFreesName(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
ctx := context.Background()
|
||||
|
||||
if _, err := s.Reserve(ctx, "omani.works", "tenant1", 1*time.Millisecond, "test"); err != nil {
|
||||
t.Fatalf("reserve: %v", err)
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Reserve again — should succeed because the previous reservation has
|
||||
// expired (the Reserve path prunes the expired row in the same tx).
|
||||
a, err := s.Reserve(ctx, "omani.works", "tenant1", 10*time.Minute, "test")
|
||||
if err != nil {
|
||||
t.Fatalf("re-reserve after expiry: %v", err)
|
||||
}
|
||||
if a.State != StateReserved {
|
||||
t.Errorf("state=%s want reserved", a.State)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommitFlipsState(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
ctx := context.Background()
|
||||
|
||||
r, err := s.Reserve(ctx, "omani.works", "tenant1", 10*time.Minute, "test")
|
||||
if err != nil {
|
||||
t.Fatalf("reserve: %v", err)
|
||||
}
|
||||
committed, err := s.Commit(ctx, "omani.works", "tenant1", CommitInput{
|
||||
ReservationToken: r.ReservationToken,
|
||||
SovereignFQDN: "tenant1.omani.works",
|
||||
LoadBalancerIP: "1.2.3.4",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("commit: %v", err)
|
||||
}
|
||||
if committed.State != StateActive {
|
||||
t.Errorf("state=%s want active", committed.State)
|
||||
}
|
||||
if committed.LoadBalancerIP != "1.2.3.4" {
|
||||
t.Errorf("lbIP=%s", committed.LoadBalancerIP)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommitTokenMismatch(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
ctx := context.Background()
|
||||
|
||||
if _, err := s.Reserve(ctx, "omani.works", "tenant1", 10*time.Minute, "test"); err != nil {
|
||||
t.Fatalf("reserve: %v", err)
|
||||
}
|
||||
_, err := s.Commit(ctx, "omani.works", "tenant1", CommitInput{
|
||||
ReservationToken: uuid.NewString(),
|
||||
SovereignFQDN: "tenant1.omani.works",
|
||||
LoadBalancerIP: "1.2.3.4",
|
||||
})
|
||||
if !errors.Is(err, ErrTokenMismatch) {
|
||||
t.Fatalf("commit: want ErrTokenMismatch, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReleaseRemovesRow(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
ctx := context.Background()
|
||||
|
||||
r, err := s.Reserve(ctx, "omani.works", "tenant1", 10*time.Minute, "test")
|
||||
if err != nil {
|
||||
t.Fatalf("reserve: %v", err)
|
||||
}
|
||||
if _, err := s.Commit(ctx, "omani.works", "tenant1", CommitInput{
|
||||
ReservationToken: r.ReservationToken,
|
||||
SovereignFQDN: "tenant1.omani.works",
|
||||
LoadBalancerIP: "1.2.3.4",
|
||||
}); err != nil {
|
||||
t.Fatalf("commit: %v", err)
|
||||
}
|
||||
freed, err := s.Release(ctx, "omani.works", "tenant1")
|
||||
if err != nil {
|
||||
t.Fatalf("release: %v", err)
|
||||
}
|
||||
if freed.State != StateActive {
|
||||
t.Errorf("freed.State=%s want active (the previous state)", freed.State)
|
||||
}
|
||||
// Now the row is gone — Get must return ErrNotFound.
|
||||
if _, err := s.Get(ctx, "omani.works", "tenant1"); !errors.Is(err, ErrNotFound) {
|
||||
t.Errorf("after release: want ErrNotFound, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpireReservationsSweeper(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
ctx := context.Background()
|
||||
|
||||
if _, err := s.Reserve(ctx, "omani.works", "x", 1*time.Millisecond, "test"); err != nil {
|
||||
t.Fatalf("reserve: %v", err)
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
deleted, err := s.ExpireReservations(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("expire: %v", err)
|
||||
}
|
||||
if deleted != 1 {
|
||||
t.Errorf("deleted=%d want 1", deleted)
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,158 @@
|
||||
# Composition: dynadot-pool-allocation.compose.openova.io — default
|
||||
# realization for XDynadotPoolAllocation. Renders to a sequence of
|
||||
# provider-http MR calls that mirror the wizard's imperative lifecycle:
|
||||
#
|
||||
# 1. POST http://pool-domain-manager.../api/v1/pool/{poolDomain}/reserve
|
||||
# body: { subdomain, createdBy }
|
||||
# → returns reservationToken
|
||||
#
|
||||
# 2. POST http://pool-domain-manager.../api/v1/pool/{poolDomain}/commit
|
||||
# body: { subdomain, reservationToken, sovereignFQDN, loadBalancerIP }
|
||||
#
|
||||
# 3. (on delete) DELETE http://pool-domain-manager.../api/v1/pool/{poolDomain}/release
|
||||
# body: { subdomain }
|
||||
#
|
||||
# Per docs/INVIOLABLE-PRINCIPLES.md principle #3 we use Crossplane's
|
||||
# provider-http so the entire lifecycle is declarative — no bespoke Go,
|
||||
# no exec.Command, no out-of-band shell scripts. The operator commits a
|
||||
# DynadotPoolAllocation claim to Git and Flux + Crossplane converge.
|
||||
#
|
||||
# Provider-http reference:
|
||||
# https://github.com/crossplane-contrib/provider-http
|
||||
# We expect the cluster to have a ProviderConfig named 'pool-domain-manager'
|
||||
# pointing at the in-cluster service URL — set up by Phase-0 cluster
|
||||
# bootstrap (the Catalyst-Zero install ships this ProviderConfig as part
|
||||
# of the platform's openova-system manifests).
|
||||
|
||||
apiVersion: apiextensions.crossplane.io/v1
|
||||
kind: Composition
|
||||
metadata:
|
||||
name: dynadot-pool-allocation.compose.openova.io
|
||||
labels:
|
||||
catalyst.openova.io/component: crossplane
|
||||
catalyst.openova.io/composition-family: pool-domain-manager
|
||||
spec:
|
||||
compositeTypeRef:
|
||||
apiVersion: compose.openova.io/v1alpha1
|
||||
kind: XDynadotPoolAllocation
|
||||
|
||||
writeConnectionSecretsToNamespace: crossplane-system
|
||||
|
||||
resources:
|
||||
# ── 1. Reserve ────────────────────────────────────────────────────────
|
||||
# Calls PDM /reserve. Provider-http stores the response body in the MR's
|
||||
# status; the next step's request templating reads reservationToken
|
||||
# from there.
|
||||
- name: reserve
|
||||
base:
|
||||
apiVersion: http.crossplane.io/v1alpha2
|
||||
kind: Request
|
||||
spec:
|
||||
forProvider:
|
||||
url: "" # filled by patch
|
||||
method: POST
|
||||
headers:
|
||||
Content-Type:
|
||||
- application/json
|
||||
payload:
|
||||
baseUrl: "" # patched
|
||||
body: "" # patched (subdomain + createdBy)
|
||||
mappings:
|
||||
- method: POST
|
||||
action: CREATE
|
||||
url: "" # patched
|
||||
body: "" # patched
|
||||
- method: DELETE
|
||||
action: REMOVE
|
||||
url: "" # patched (release endpoint)
|
||||
body: "" # patched
|
||||
providerConfigRef:
|
||||
name: pool-domain-manager
|
||||
patches:
|
||||
# PDM URL: '<base>/api/v1/pool/<poolDomain>/reserve' on CREATE,
|
||||
# '<base>/api/v1/pool/<poolDomain>/release' on DELETE. We default
|
||||
# base to the in-cluster ClusterIP service so a stock Catalyst
|
||||
# Sovereign bootstrap doesn't need any per-cluster overrides.
|
||||
- fromFieldPath: spec.parameters.poolDomain
|
||||
toFieldPath: spec.forProvider.mappings[0].url
|
||||
transforms:
|
||||
- type: string
|
||||
string:
|
||||
fmt: "http://pool-domain-manager.openova-system.svc.cluster.local:8080/api/v1/pool/%s/reserve"
|
||||
- fromFieldPath: spec.parameters.poolDomain
|
||||
toFieldPath: spec.forProvider.mappings[1].url
|
||||
transforms:
|
||||
- type: string
|
||||
string:
|
||||
fmt: "http://pool-domain-manager.openova-system.svc.cluster.local:8080/api/v1/pool/%s/release"
|
||||
# Body for CREATE: { "subdomain": "<sub>", "createdBy": "crossplane" }
|
||||
- fromFieldPath: spec.parameters.subdomain
|
||||
toFieldPath: spec.forProvider.mappings[0].body
|
||||
transforms:
|
||||
- type: string
|
||||
string:
|
||||
fmt: '{"subdomain":"%s","createdBy":"crossplane"}'
|
||||
- fromFieldPath: spec.parameters.subdomain
|
||||
toFieldPath: spec.forProvider.mappings[1].body
|
||||
transforms:
|
||||
- type: string
|
||||
string:
|
||||
fmt: '{"subdomain":"%s"}'
|
||||
# Surface the reservation token back onto the XR status so step 2
|
||||
# can read it via fromConnectionSecret OR via direct status patch.
|
||||
- type: ToCompositeFieldPath
|
||||
fromFieldPath: status.response.body.reservationToken
|
||||
toFieldPath: status.reservationToken
|
||||
- type: ToCompositeFieldPath
|
||||
fromFieldPath: status.response.body.state
|
||||
toFieldPath: status.state
|
||||
- type: ToCompositeFieldPath
|
||||
fromFieldPath: status.response.body.expiresAt
|
||||
toFieldPath: status.expiresAt
|
||||
|
||||
# ── 2. Commit ─────────────────────────────────────────────────────────
|
||||
# Calls PDM /commit. Depends on the reservation token surfaced by
|
||||
# step 1. provider-http evaluates resources in order, so by the time
|
||||
# this MR runs the XR's status.reservationToken is populated.
|
||||
- name: commit
|
||||
base:
|
||||
apiVersion: http.crossplane.io/v1alpha2
|
||||
kind: Request
|
||||
spec:
|
||||
forProvider:
|
||||
mappings:
|
||||
- method: POST
|
||||
action: CREATE
|
||||
url: "" # patched
|
||||
body: "" # patched
|
||||
headers:
|
||||
Content-Type:
|
||||
- application/json
|
||||
providerConfigRef:
|
||||
name: pool-domain-manager
|
||||
patches:
|
||||
- fromFieldPath: spec.parameters.poolDomain
|
||||
toFieldPath: spec.forProvider.mappings[0].url
|
||||
transforms:
|
||||
- type: string
|
||||
string:
|
||||
fmt: "http://pool-domain-manager.openova-system.svc.cluster.local:8080/api/v1/pool/%s/commit"
|
||||
# Body composition: we need subdomain, reservationToken,
|
||||
# sovereignFQDN, loadBalancerIP. Crossplane's standard patches
|
||||
# don't support multi-source string interpolation in a single
|
||||
# transform, so we use a CombineFromComposite block.
|
||||
- type: CombineFromComposite
|
||||
combine:
|
||||
variables:
|
||||
- fromFieldPath: spec.parameters.subdomain
|
||||
- fromFieldPath: status.reservationToken
|
||||
- fromFieldPath: spec.parameters.sovereignFQDN
|
||||
- fromFieldPath: spec.parameters.loadBalancerIP
|
||||
strategy: string
|
||||
string:
|
||||
fmt: '{"subdomain":"%s","reservationToken":"%s","sovereignFQDN":"%s","loadBalancerIP":"%s"}'
|
||||
toFieldPath: spec.forProvider.mappings[0].body
|
||||
# On commit success the row flips to active.
|
||||
- type: ToCompositeFieldPath
|
||||
fromFieldPath: status.response.body.state
|
||||
toFieldPath: status.state
|
||||
128
platform/crossplane/compositions/xrd-pool-allocation.yaml
Normal file
128
platform/crossplane/compositions/xrd-pool-allocation.yaml
Normal file
@ -0,0 +1,128 @@
|
||||
# XRD: XDynadotPoolAllocation — Crossplane wrapper over pool-domain-manager.
|
||||
#
|
||||
# The PDM HTTP API (introduced in #163) is the SOLE caller of api.dynadot.com
|
||||
# in the OpenOva fleet. Most call paths go via catalyst-api during the
|
||||
# wizard's lifecycle. This XR exists for the operator-driven path: an
|
||||
# operator who wants to PRE-allocate a name (e.g. reserve `omantel.omani.works`
|
||||
# for a Sovereign that hasn't been provisioned yet) commits a YAML to Git
|
||||
# and Crossplane converges PDM to match.
|
||||
#
|
||||
# Per docs/BLUEPRINT-AUTHORING.md §8 the canonical XRD group is
|
||||
# compose.openova.io/v1alpha1 — NOT catalyst.openova.io.
|
||||
#
|
||||
# Per docs/INVIOLABLE-PRINCIPLES.md principle #3 this Composition is the
|
||||
# ONLY way operators declaratively manage pool allocations. The wizard's
|
||||
# imperative path (catalyst-api → PDM HTTP) and this declarative path both
|
||||
# write to the same PDM database — there is one source of truth.
|
||||
#
|
||||
# Per docs/INVIOLABLE-PRINCIPLES.md principle #4 every value
|
||||
# (poolDomain, subdomain, sovereignFQDN, loadBalancerIP) is a schema field —
|
||||
# no hardcoded names.
|
||||
|
||||
apiVersion: apiextensions.crossplane.io/v1
|
||||
kind: CompositeResourceDefinition
|
||||
metadata:
|
||||
name: xdynadotpoolallocations.compose.openova.io
|
||||
labels:
|
||||
catalyst.openova.io/component: crossplane
|
||||
catalyst.openova.io/composition-family: pool-domain-manager
|
||||
spec:
|
||||
group: compose.openova.io
|
||||
names:
|
||||
kind: XDynadotPoolAllocation
|
||||
plural: xdynadotpoolallocations
|
||||
claimNames:
|
||||
kind: DynadotPoolAllocation
|
||||
plural: dynadotpoolallocations
|
||||
defaultCompositionRef:
|
||||
name: dynadot-pool-allocation.compose.openova.io
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
referenceable: true
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
required: [spec]
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
required: [parameters]
|
||||
properties:
|
||||
parameters:
|
||||
type: object
|
||||
required: [poolDomain, subdomain, sovereignFQDN, loadBalancerIP]
|
||||
properties:
|
||||
poolDomain:
|
||||
type: string
|
||||
description: |
|
||||
OpenOva-managed pool domain (e.g. omani.works,
|
||||
openova.io). Must appear in PDM's runtime
|
||||
DYNADOT_MANAGED_DOMAINS env var or the Composition
|
||||
will fail to reserve.
|
||||
pattern: '^[a-z0-9][a-z0-9.-]+[a-z0-9]$'
|
||||
subdomain:
|
||||
type: string
|
||||
description: |
|
||||
Single RFC 1035 label (a-z, 0-9, hyphens). The
|
||||
Composition reserves '<subdomain>.<poolDomain>' in
|
||||
PDM and writes the canonical 6-record set to
|
||||
Dynadot at commit time.
|
||||
pattern: '^[a-z][a-z0-9-]{0,61}[a-z0-9]$'
|
||||
sovereignFQDN:
|
||||
type: string
|
||||
description: |
|
||||
Final FQDN the Sovereign answers on. Usually
|
||||
'<subdomain>.<poolDomain>' but can differ for
|
||||
operator-staged migrations.
|
||||
loadBalancerIP:
|
||||
type: string
|
||||
description: |
|
||||
IPv4 of the Hetzner Load Balancer that fronts the
|
||||
Sovereign. Operator obtains this from the prior
|
||||
XHetznerLoadBalancer status.loadBalancerIP.
|
||||
pattern: '^([0-9]{1,3}\.){3}[0-9]{1,3}$'
|
||||
createdBy:
|
||||
type: string
|
||||
description: |
|
||||
Audit-trail string written into pool_allocations.
|
||||
Defaults to 'crossplane' when omitted.
|
||||
description: |
|
||||
PDM allocation parameters. The full lifecycle (reserve →
|
||||
commit) is encoded in the Composition; the operator only
|
||||
needs to supply these four fields plus the optional
|
||||
audit-trail attribution.
|
||||
status:
|
||||
type: object
|
||||
properties:
|
||||
state:
|
||||
type: string
|
||||
description: PDM-reported state (reserved | active).
|
||||
enum: [reserved, active]
|
||||
expiresAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: TTL deadline when state=reserved.
|
||||
reservationToken:
|
||||
type: string
|
||||
format: uuid
|
||||
description: |
|
||||
Returned by /reserve; consumed by /commit. Operators
|
||||
don't typically read this — Crossplane chains the calls
|
||||
automatically — but it is exposed for debugging.
|
||||
additionalPrinterColumns:
|
||||
- name: POOL
|
||||
type: string
|
||||
jsonPath: .spec.parameters.poolDomain
|
||||
- name: SUBDOMAIN
|
||||
type: string
|
||||
jsonPath: .spec.parameters.subdomain
|
||||
- name: STATE
|
||||
type: string
|
||||
jsonPath: .status.state
|
||||
- name: LB-IP
|
||||
type: string
|
||||
jsonPath: .spec.parameters.loadBalancerIP
|
||||
- name: AGE
|
||||
type: date
|
||||
jsonPath: .metadata.creationTimestamp
|
||||
@ -16,6 +16,7 @@ import (
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
@ -23,6 +24,7 @@ import (
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/openova-io/openova/products/catalyst/bootstrap/api/internal/pdm"
|
||||
"github.com/openova-io/openova/products/catalyst/bootstrap/api/internal/provisioner"
|
||||
)
|
||||
|
||||
@ -37,6 +39,17 @@ type Deployment struct {
|
||||
FinishedAt time.Time
|
||||
Events chan provisioner.Event
|
||||
mu sync.Mutex
|
||||
|
||||
// PDM reservation captured before `tofu apply` for managed-pool
|
||||
// deployments. The reservationToken is held until `tofu apply`
|
||||
// returns the LB IP, at which point we POST it to PDM /commit. On
|
||||
// `tofu destroy` (or a phase-0 retry that decides to abandon) we
|
||||
// DELETE /release.
|
||||
//
|
||||
// Empty for BYO deployments — those keep their own DNS off-platform.
|
||||
pdmReservationToken string
|
||||
pdmPoolDomain string
|
||||
pdmSubdomain string
|
||||
}
|
||||
|
||||
// State returns a JSON-safe snapshot for the GET endpoint.
|
||||
@ -92,6 +105,47 @@ func (h *Handler) CreateDeployment(w http.ResponseWriter, r *http.Request) {
|
||||
StartedAt: time.Now(),
|
||||
Events: make(chan provisioner.Event, 256),
|
||||
}
|
||||
|
||||
// Reserve the pool subdomain via PDM BEFORE we kick off `tofu apply`.
|
||||
// PDM holds the name with a TTL — if `tofu apply` fails or this catalyst-
|
||||
// api Pod crashes, the TTL expires and the name is freed automatically.
|
||||
// On the success path the runProvisioning goroutine calls /commit with
|
||||
// the LB IP, which flips the reservation to ACTIVE and writes the
|
||||
// Dynadot DNS records.
|
||||
//
|
||||
// For BYO deployments (the customer owns the DNS zone) we skip PDM
|
||||
// entirely — the customer points their own CNAME at the LB IP shown
|
||||
// on the success screen.
|
||||
if req.SovereignDomainMode == "pool" && pdm.IsManagedDomain(req.SovereignPoolDomain) {
|
||||
if h.pdm == nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{
|
||||
"error": "pool-domain-manager client is not configured (POOL_DOMAIN_MANAGER_URL)",
|
||||
})
|
||||
return
|
||||
}
|
||||
reserveCtx, reserveCancel := context.WithTimeout(r.Context(), 10*time.Second)
|
||||
reservation, reserveErr := h.pdm.Reserve(reserveCtx, req.SovereignPoolDomain, req.SovereignSubdomain, "catalyst-api/deployment-"+id)
|
||||
reserveCancel()
|
||||
if reserveErr != nil {
|
||||
if errors.Is(reserveErr, pdm.ErrConflict) {
|
||||
writeJSON(w, http.StatusConflict, map[string]string{
|
||||
"error": "subdomain-conflict",
|
||||
"detail": "this subdomain has been reserved or activated for the chosen pool — pick a different name",
|
||||
})
|
||||
return
|
||||
}
|
||||
h.log.Error("pdm reserve failed", "id", id, "err", reserveErr)
|
||||
writeJSON(w, http.StatusServiceUnavailable, map[string]string{
|
||||
"error": "pdm-unavailable",
|
||||
"detail": "pool-domain-manager is temporarily unreachable: " + reserveErr.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
dep.pdmReservationToken = reservation.ReservationToken
|
||||
dep.pdmPoolDomain = reservation.PoolDomain
|
||||
dep.pdmSubdomain = reservation.Subdomain
|
||||
}
|
||||
|
||||
h.deployments.Store(id, dep)
|
||||
|
||||
// Capture status before launching the goroutine — runProvisioning races
|
||||
@ -181,6 +235,48 @@ func (h *Handler) runProvisioning(dep *Deployment) {
|
||||
)
|
||||
}
|
||||
dep.mu.Unlock()
|
||||
|
||||
// PDM lifecycle: on success, /commit with the LB IP; on failure, /release
|
||||
// so the reservation TTL doesn't have to expire to free the name. PDM is
|
||||
// the single owner of the Dynadot side-effect (it is also responsible for
|
||||
// AddSovereignRecords on commit; catalyst-api never writes DNS itself).
|
||||
if dep.pdmReservationToken != "" && h.pdm != nil {
|
||||
pdmCtx, pdmCancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer pdmCancel()
|
||||
if err == nil && result != nil {
|
||||
commitErr := h.pdm.Commit(pdmCtx, dep.pdmPoolDomain, pdm.CommitInput{
|
||||
Subdomain: dep.pdmSubdomain,
|
||||
ReservationToken: dep.pdmReservationToken,
|
||||
SovereignFQDN: result.SovereignFQDN,
|
||||
LoadBalancerIP: result.LoadBalancerIP,
|
||||
})
|
||||
if commitErr != nil {
|
||||
h.log.Error("pdm commit failed; sovereign is live but DNS records may be stale",
|
||||
"id", dep.ID,
|
||||
"poolDomain", dep.pdmPoolDomain,
|
||||
"subdomain", dep.pdmSubdomain,
|
||||
"err", commitErr,
|
||||
)
|
||||
} else {
|
||||
h.log.Info("pdm commit complete",
|
||||
"id", dep.ID,
|
||||
"poolDomain", dep.pdmPoolDomain,
|
||||
"subdomain", dep.pdmSubdomain,
|
||||
"loadBalancerIP", result.LoadBalancerIP,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
releaseErr := h.pdm.Release(pdmCtx, dep.pdmPoolDomain, dep.pdmSubdomain)
|
||||
if releaseErr != nil && !errors.Is(releaseErr, pdm.ErrNotFound) {
|
||||
h.log.Error("pdm release failed; reservation will expire on TTL",
|
||||
"id", dep.ID,
|
||||
"poolDomain", dep.pdmPoolDomain,
|
||||
"subdomain", dep.pdmSubdomain,
|
||||
"err", releaseErr,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newID() string {
|
||||
|
||||
@ -0,0 +1,122 @@
|
||||
// Tests for the catalyst-api → PDM lifecycle: reserve before tofu apply,
|
||||
// commit on success, release on failure. These cover the deployment-level
|
||||
// path #163 introduced — the wizard creates a deployment, PDM holds the
|
||||
// reservation while tofu runs, and PDM owns the eventual DNS write.
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/openova-io/openova/products/catalyst/bootstrap/api/internal/pdm"
|
||||
)
|
||||
|
||||
func TestCreateDeployment_ManagedPoolReservesViaPDM(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
body, _ := json.Marshal(map[string]any{
|
||||
"sovereignFQDN": "omantel.omani.works",
|
||||
"sovereignDomainMode": "pool",
|
||||
"sovereignPoolDomain": "omani.works",
|
||||
"sovereignSubdomain": "omantel",
|
||||
"hetznerToken": "tok",
|
||||
"hetznerProjectID": "proj",
|
||||
"region": "fsn1",
|
||||
"orgName": "Omantel",
|
||||
"orgEmail": "ops@omantel.om",
|
||||
"sshPublicKey": "ssh-ed25519 AAAA test",
|
||||
})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
r := httptest.NewRequest(http.MethodPost, "/api/v1/deployments", bytes.NewReader(body))
|
||||
h.CreateDeployment(w, r)
|
||||
|
||||
// 201 — deployment row created. The runProvisioning goroutine is
|
||||
// launched in a background goroutine; in this unit test the goroutine
|
||||
// will fail at tofu exec (not installed) but for this test we only
|
||||
// care that CreateDeployment reserved before launching it.
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if len(fake.reserves) != 1 {
|
||||
t.Fatalf("expected 1 PDM reserve, got %d", len(fake.reserves))
|
||||
}
|
||||
if fake.reserves[0].pool != "omani.works" || fake.reserves[0].sub != "omantel" {
|
||||
t.Errorf("reserve called with wrong args: %+v", fake.reserves[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateDeployment_PDMConflictBlocksDeployment(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{
|
||||
reserve: func(ctx context.Context, pool, sub, by string) (*pdm.Reservation, error) {
|
||||
return nil, pdm.ErrConflict
|
||||
},
|
||||
}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
body, _ := json.Marshal(map[string]any{
|
||||
"sovereignFQDN": "omantel.omani.works",
|
||||
"sovereignDomainMode": "pool",
|
||||
"sovereignPoolDomain": "omani.works",
|
||||
"sovereignSubdomain": "omantel",
|
||||
"hetznerToken": "tok",
|
||||
"hetznerProjectID": "proj",
|
||||
"region": "fsn1",
|
||||
"orgName": "Omantel",
|
||||
"orgEmail": "ops@omantel.om",
|
||||
"sshPublicKey": "ssh-ed25519 AAAA test",
|
||||
})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
r := httptest.NewRequest(http.MethodPost, "/api/v1/deployments", bytes.NewReader(body))
|
||||
h.CreateDeployment(w, r)
|
||||
|
||||
if w.Code != http.StatusConflict {
|
||||
t.Fatalf("status=%d want 409 (subdomain-conflict), body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateDeployment_BYODoesNotReserve(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
body, _ := json.Marshal(map[string]any{
|
||||
"sovereignFQDN": "k8s.acme.io",
|
||||
"sovereignDomainMode": "byo",
|
||||
"sovereignPoolDomain": "acme.io",
|
||||
"sovereignSubdomain": "k8s",
|
||||
"hetznerToken": "tok",
|
||||
"hetznerProjectID": "proj",
|
||||
"region": "fsn1",
|
||||
"orgName": "Acme",
|
||||
"orgEmail": "ops@acme.io",
|
||||
"sshPublicKey": "ssh-ed25519 AAAA test",
|
||||
})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
r := httptest.NewRequest(http.MethodPost, "/api/v1/deployments", bytes.NewReader(body))
|
||||
h.CreateDeployment(w, r)
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
// BYO must NOT consult PDM — the customer owns DNS.
|
||||
if len(fake.reserves) != 0 {
|
||||
t.Errorf("BYO reserved via PDM unexpectedly: %+v", fake.reserves)
|
||||
}
|
||||
}
|
||||
@ -1,3 +1,4 @@
|
||||
// Package handler holds shared state for all HTTP handlers.
|
||||
package handler
|
||||
|
||||
import (
|
||||
@ -6,28 +7,62 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/openova-io/openova/products/catalyst/bootstrap/api/internal/pdm"
|
||||
)
|
||||
|
||||
// Handler holds shared state for all HTTP handlers.
|
||||
//
|
||||
// dynadotAPIKey + dynadotAPISecret are read from environment variables that
|
||||
// are mounted from the dynadot-api-credentials K8s secret in the
|
||||
// openova-system namespace via ESO at deploy time. They are injected into
|
||||
// pool-domain ProvisionRequests so the provisioner can write DNS records
|
||||
// for *.{subdomain}.{pool-domain}.
|
||||
// dynadotAPIKey + dynadotAPISecret remain on the Handler so the OpenTofu
|
||||
// module's `dynadot_*` variables can still receive credentials for the
|
||||
// Phase-0 DNS bootstrap that runs at first `tofu apply` time. After #163
|
||||
// Phase 4 lands the Crossplane Composition that wraps PDM as a declarative
|
||||
// MR, even those fields go away (PDM holds the credentials; catalyst-api
|
||||
// merely calls PDM via the in-cluster service FQDN).
|
||||
//
|
||||
// pdm is the central authority for OpenOva-pool subdomain allocation
|
||||
// (introduced by #163). catalyst-api never calls api.dynadot.com directly
|
||||
// for the availability check / reservation lifecycle after this lands —
|
||||
// every interaction with the Dynadot zone flows through PDM.
|
||||
type Handler struct {
|
||||
log *slog.Logger
|
||||
deployments sync.Map // map[string]*Deployment
|
||||
dynadotAPIKey string
|
||||
dynadotAPISecret string
|
||||
|
||||
// pdm — pool-domain-manager client. Required in production; tests can
|
||||
// inject a fake via NewWithPDM. The default URL points at the in-cluster
|
||||
// service FQDN so a stock Catalyst-Zero deployment "just works" without
|
||||
// per-pod configuration.
|
||||
pdm pdmClient
|
||||
}
|
||||
|
||||
// New creates a Handler.
|
||||
// New creates a Handler with the runtime configuration loaded from env.
|
||||
//
|
||||
// POOL_DOMAIN_MANAGER_URL — defaults to the in-cluster service FQDN. Per
|
||||
// docs/INVIOLABLE-PRINCIPLES.md #4 the URL is configuration, not code; an
|
||||
// air-gapped install can override it to point at the operator's own
|
||||
// PDM endpoint.
|
||||
func New(log *slog.Logger) *Handler {
|
||||
pdmURL := os.Getenv("POOL_DOMAIN_MANAGER_URL")
|
||||
if pdmURL == "" {
|
||||
pdmURL = "http://pool-domain-manager.openova-system.svc.cluster.local:8080"
|
||||
}
|
||||
return &Handler{
|
||||
log: log,
|
||||
dynadotAPIKey: os.Getenv("DYNADOT_API_KEY"),
|
||||
dynadotAPISecret: os.Getenv("DYNADOT_API_SECRET"),
|
||||
pdm: pdm.New(pdmURL),
|
||||
}
|
||||
}
|
||||
|
||||
// NewWithPDM is exposed for tests; production code uses New.
|
||||
func NewWithPDM(log *slog.Logger, client pdmClient) *Handler {
|
||||
return &Handler{
|
||||
log: log,
|
||||
dynadotAPIKey: os.Getenv("DYNADOT_API_KEY"),
|
||||
dynadotAPISecret: os.Getenv("DYNADOT_API_SECRET"),
|
||||
pdm: client,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,39 +1,34 @@
|
||||
// Package handler — subdomains.go: pre-submit availability check.
|
||||
//
|
||||
// Closes #124 ([I] ux: error handling — what happens if subdomain already
|
||||
// taken). The wizard's StepOrg debounces keystrokes and POSTs the
|
||||
// candidate subdomain + pool-domain pair here BEFORE the user clicks
|
||||
// Next, so the validator catches collisions early instead of failing
|
||||
// at provisioning time when Dynadot rejects the duplicate record.
|
||||
// Closes the DNS-wildcard regression in #163 by routing every check for an
|
||||
// OpenOva-managed pool domain through pool-domain-manager (PDM). PDM is the
|
||||
// authoritative allocation source — it does not consult DNS at all, so the
|
||||
// Dynadot wildcard parking record at the apex of omani.works (which made
|
||||
// EVERY subdomain resolve to 185.53.179.128 and broke the previous
|
||||
// LookupHost-based check) is now architecturally irrelevant for managed
|
||||
// pools.
|
||||
//
|
||||
// How "taken" is determined:
|
||||
// Decision tree per request:
|
||||
//
|
||||
// 1. Pool-domain check — only OpenOva-managed pool domains are
|
||||
// candidates for this endpoint; BYO domains are the customer's
|
||||
// responsibility. We reject any pool the wizard doesn't recognise
|
||||
// (defence-in-depth — the wizard already filters its own dropdown,
|
||||
// but the handler must validate independently).
|
||||
// 1. Validate the subdomain as an RFC 1035 label (cheap, local).
|
||||
// 2. If poolDomain is in the runtime DYNADOT_MANAGED_DOMAINS list →
|
||||
// delegate to PDM via Client.Check. PDM owns the reserved-name list
|
||||
// and the allocation table; we just surface its response verbatim.
|
||||
// 3. Otherwise the caller is asking about a BYO domain (a customer's own
|
||||
// DNS zone) — fall back to a DNS-based check via net.LookupHost. PDM
|
||||
// doesn't manage BYO zones; the customer's nameserver IS the source
|
||||
// of truth there.
|
||||
//
|
||||
// 2. Reserved-name check — short list of RFC 1035 / OpenOva
|
||||
// Sovereign-control-plane subdomains we never let a tenant claim
|
||||
// (api, admin, console, gitea, harbor, www, mail). Tenants get
|
||||
// *those* names automatically as part of the Sovereign FQDN
|
||||
// structure once they pick their root subdomain.
|
||||
// Per docs/INVIOLABLE-PRINCIPLES.md #4: PDM's URL is read from the
|
||||
// POOL_DOMAIN_MANAGER_URL env var (default = in-cluster service FQDN). The
|
||||
// reserved-name list lives ONLY in PDM after this commit — catalyst-api no
|
||||
// longer maintains a copy.
|
||||
//
|
||||
// 3. DNS resolution — net.DefaultResolver.LookupHost on
|
||||
// "<subdomain>.<pool>" with a 2-second timeout. If anything
|
||||
// resolves, the name is considered taken (whether it's an A,
|
||||
// AAAA, or CNAME, the global DNS already knows about it).
|
||||
//
|
||||
// Per docs/INVIOLABLE-PRINCIPLES.md #4 ("never hardcode") the pool
|
||||
// list is shared with the package-level IsManagedDomain check in
|
||||
// internal/dynadot/. The reserved-name list is centralised here.
|
||||
//
|
||||
// Per the auto-memory `feedback_dynadot_dns.md`: NEVER run exploratory
|
||||
// set_dns2 calls. We deliberately do NOT call Dynadot's API for the
|
||||
// availability check — Dynadot's API is write-only-safe. The global
|
||||
// DNS resolver is the eventually-consistent source of truth for what
|
||||
// names already point somewhere.
|
||||
// Per Lesson #24 in docs/INVIOLABLE-PRINCIPLES.md: this is a STRUCTURAL fix,
|
||||
// not a bandaid. The previous DNS-based path is REMOVED for managed pools,
|
||||
// not augmented. The only remaining net.LookupHost call lives in the BYO
|
||||
// branch — and it is the right tool there because BYO zones are owned by
|
||||
// the customer, not by OpenOva.
|
||||
package handler
|
||||
|
||||
import (
|
||||
@ -45,37 +40,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openova-io/openova/products/catalyst/bootstrap/api/internal/dynadot"
|
||||
"github.com/openova-io/openova/products/catalyst/bootstrap/api/internal/pdm"
|
||||
)
|
||||
|
||||
// reservedSubdomains — names we never let a tenant claim as their
|
||||
// Sovereign root subdomain. Tenants get *.omantel.omani.works style
|
||||
// records automatically; the wizard prevents claiming any name that
|
||||
// would collide with the canonical control-plane sub-records.
|
||||
var reservedSubdomains = map[string]struct{}{
|
||||
"api": {},
|
||||
"admin": {},
|
||||
"console": {},
|
||||
"gitea": {},
|
||||
"harbor": {},
|
||||
"keycloak": {},
|
||||
"www": {},
|
||||
"mail": {},
|
||||
"smtp": {},
|
||||
"imap": {},
|
||||
"vpn": {},
|
||||
"openova": {},
|
||||
"catalyst": {},
|
||||
"docs": {},
|
||||
"status": {},
|
||||
"app": {},
|
||||
"system": {},
|
||||
"openbao": {},
|
||||
"vault": {},
|
||||
"flux": {},
|
||||
"k8s": {},
|
||||
}
|
||||
|
||||
type subdomainCheckRequest struct {
|
||||
Subdomain string `json:"subdomain"`
|
||||
// PoolDomain — the apex pool domain (e.g. "omani.works"), NOT the
|
||||
@ -88,14 +55,19 @@ type subdomainCheckRequest struct {
|
||||
//
|
||||
// available=true → subdomain is free, user can submit.
|
||||
// available=false → subdomain is taken; reason explains why.
|
||||
// (no error field) → backend reached resolver / pool list cleanly.
|
||||
//
|
||||
// reason values:
|
||||
// reason values (managed pools mirror PDM verbatim, BYO uses local strings):
|
||||
// "invalid-format" subdomain is not a valid RFC 1035 label
|
||||
// "unsupported-pool" poolDomain is not an OpenOva-managed pool
|
||||
// "reserved" subdomain is in reservedSubdomains
|
||||
// "exists" DNS resolver returned at least one record
|
||||
// "lookup-error" DNS lookup itself failed (transient — user retries)
|
||||
// "unsupported-pool" poolDomain is not an OpenOva-managed pool (PDM)
|
||||
// — only surfaced for the BYO path's sanity check;
|
||||
// managed-pool requests delegate to PDM which owns
|
||||
// this verdict.
|
||||
// "reserved" subdomain is in PDM's reserved list (managed)
|
||||
// "reserved-state" PDM holds a non-expired reservation (managed)
|
||||
// "active-state" PDM has an active allocation (managed)
|
||||
// "exists" BYO DNS resolver returned at least one record
|
||||
// "lookup-error" BYO DNS lookup itself failed (transient)
|
||||
// "pdm-unavailable" PDM call failed — wizard treats as transient
|
||||
type SubdomainCheckResponse struct {
|
||||
Available bool `json:"available"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
@ -137,36 +109,71 @@ func (h *Handler) CheckSubdomain(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
if !dynadot.IsManagedDomain(pool) {
|
||||
// Managed pools — PDM is the authoritative source of truth.
|
||||
if pdm.IsManagedDomain(pool) {
|
||||
h.checkManagedPool(w, r.Context(), pool, sub)
|
||||
return
|
||||
}
|
||||
|
||||
// BYO domain — fall back to the legacy DNS-based check. The customer
|
||||
// owns the zone; resolving the name is the only signal we have.
|
||||
h.checkBYO(w, r.Context(), pool, sub)
|
||||
}
|
||||
|
||||
// checkManagedPool delegates to PDM. We surface PDM's response verbatim
|
||||
// (available, reason, detail, fqdn) so the wizard can render PDM's
|
||||
// authoritative messages without an extra mapping layer.
|
||||
func (h *Handler) checkManagedPool(w http.ResponseWriter, ctx context.Context, pool, sub string) {
|
||||
if h.pdm == nil {
|
||||
// Defence-in-depth: if the deployment forgot POOL_DOMAIN_MANAGER_URL,
|
||||
// surface a transient error rather than silently falling back to DNS
|
||||
// (which would resurrect the wildcard-parking bug this file exists
|
||||
// to fix).
|
||||
writeJSON(w, http.StatusOK, SubdomainCheckResponse{
|
||||
Available: false,
|
||||
Reason: "unsupported-pool",
|
||||
Detail: "pool domain " + pool + " is not managed by OpenOva — pick a different pool or use BYO",
|
||||
Reason: "pdm-unavailable",
|
||||
Detail: "pool-domain-manager client is not configured — operator must set POOL_DOMAIN_MANAGER_URL",
|
||||
FQDN: sub + "." + pool,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if _, taken := reservedSubdomains[sub]; taken {
|
||||
pdmCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
res, err := h.pdm.Check(pdmCtx, pool, sub)
|
||||
if err != nil {
|
||||
h.log.Error("pdm check failed", "pool", pool, "sub", sub, "err", err)
|
||||
writeJSON(w, http.StatusOK, SubdomainCheckResponse{
|
||||
Available: false,
|
||||
Reason: "reserved",
|
||||
Detail: "this subdomain is reserved for the Sovereign control plane — pick a different name",
|
||||
Reason: "pdm-unavailable",
|
||||
Detail: "pool-domain-manager is temporarily unreachable — try again",
|
||||
FQDN: sub + "." + pool,
|
||||
})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, SubdomainCheckResponse{
|
||||
Available: res.Available,
|
||||
Reason: res.Reason,
|
||||
Detail: res.Detail,
|
||||
FQDN: res.FQDN,
|
||||
})
|
||||
}
|
||||
|
||||
// checkBYO performs the DNS-based availability check for customer-owned
|
||||
// (Bring-Your-Own) domains. PDM doesn't manage BYO zones — the customer's
|
||||
// nameserver is the source of truth — so net.LookupHost is the right
|
||||
// primitive here.
|
||||
func (h *Handler) checkBYO(w http.ResponseWriter, ctx context.Context, pool, sub string) {
|
||||
fqdn := sub + "." + pool
|
||||
|
||||
// Two-second timeout — long enough for global DNS but short enough
|
||||
// that the wizard's debounced keystroke loop stays responsive.
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 2*time.Second)
|
||||
dnsCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
defer cancel()
|
||||
|
||||
addrs, err := net.DefaultResolver.LookupHost(ctx, fqdn)
|
||||
addrs, err := net.DefaultResolver.LookupHost(dnsCtx, fqdn)
|
||||
if err != nil {
|
||||
// NXDOMAIN is "not taken" — the most common, success case. Any
|
||||
// other error class (timeout, server-fail) is a transient lookup
|
||||
// problem the wizard surfaces but doesn't treat as taken.
|
||||
if isNXDomain(err) {
|
||||
writeJSON(w, http.StatusOK, SubdomainCheckResponse{
|
||||
Available: true,
|
||||
@ -182,7 +189,6 @@ func (h *Handler) CheckSubdomain(w http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if len(addrs) == 0 {
|
||||
writeJSON(w, http.StatusOK, SubdomainCheckResponse{
|
||||
Available: true,
|
||||
@ -190,7 +196,6 @@ func (h *Handler) CheckSubdomain(w http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, SubdomainCheckResponse{
|
||||
Available: false,
|
||||
Reason: "exists",
|
||||
@ -235,3 +240,12 @@ func isNXDomain(err error) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// pdmClient is implemented by *pdm.Client. The interface lets us pass a
|
||||
// fake in tests without wiring a real HTTP server.
|
||||
type pdmClient interface {
|
||||
Check(ctx context.Context, poolDomain, subdomain string) (*pdm.CheckResult, error)
|
||||
Reserve(ctx context.Context, poolDomain, subdomain, createdBy string) (*pdm.Reservation, error)
|
||||
Commit(ctx context.Context, poolDomain string, in pdm.CommitInput) error
|
||||
Release(ctx context.Context, poolDomain, subdomain string) error
|
||||
}
|
||||
|
||||
@ -0,0 +1,260 @@
|
||||
// Tests for subdomains.go — the catalyst-api side of the PDM contract.
|
||||
// These cover three architectural invariants:
|
||||
//
|
||||
// 1. Managed pools NEVER call net.LookupHost. The DNS-wildcard parking
|
||||
// record at omani.works (which previously made every subdomain
|
||||
// resolve to 185.53.179.128) cannot cause a false positive when
|
||||
// the pool is in DYNADOT_MANAGED_DOMAINS — PDM is the single source
|
||||
// of truth.
|
||||
// 2. BYO domains use net.LookupHost — the customer's nameserver is
|
||||
// authoritative; PDM doesn't manage their zone.
|
||||
// 3. The PDM client is consulted exactly once per managed-pool request,
|
||||
// with the response surfaced verbatim.
|
||||
//
|
||||
// These guarantees are what prevent the regression #163 was opened for.
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/openova-io/openova/products/catalyst/bootstrap/api/internal/pdm"
|
||||
)
|
||||
|
||||
// fakePDM is a stub pdmClient that records every call. We assert against the
|
||||
// recorded calls to prove the behaviour the architecture requires.
|
||||
type fakePDM struct {
|
||||
checks []checkCall
|
||||
check func(ctx context.Context, pool, sub string) (*pdm.CheckResult, error)
|
||||
reserves []reserveCall
|
||||
reserve func(ctx context.Context, pool, sub, by string) (*pdm.Reservation, error)
|
||||
commits []pdm.CommitInput
|
||||
commit func(ctx context.Context, pool string, in pdm.CommitInput) error
|
||||
releases []releaseCall
|
||||
release func(ctx context.Context, pool, sub string) error
|
||||
}
|
||||
|
||||
type checkCall struct{ pool, sub string }
|
||||
type reserveCall struct{ pool, sub, by string }
|
||||
type releaseCall struct{ pool, sub string }
|
||||
|
||||
func (f *fakePDM) Check(ctx context.Context, pool, sub string) (*pdm.CheckResult, error) {
|
||||
f.checks = append(f.checks, checkCall{pool, sub})
|
||||
if f.check != nil {
|
||||
return f.check(ctx, pool, sub)
|
||||
}
|
||||
return &pdm.CheckResult{Available: true, FQDN: sub + "." + pool}, nil
|
||||
}
|
||||
|
||||
func (f *fakePDM) Reserve(ctx context.Context, pool, sub, by string) (*pdm.Reservation, error) {
|
||||
f.reserves = append(f.reserves, reserveCall{pool, sub, by})
|
||||
if f.reserve != nil {
|
||||
return f.reserve(ctx, pool, sub, by)
|
||||
}
|
||||
return &pdm.Reservation{
|
||||
PoolDomain: pool, Subdomain: sub, State: "reserved",
|
||||
ReservationToken: "00000000-0000-0000-0000-000000000000",
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (f *fakePDM) Commit(ctx context.Context, pool string, in pdm.CommitInput) error {
|
||||
f.commits = append(f.commits, in)
|
||||
if f.commit != nil {
|
||||
return f.commit(ctx, pool, in)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *fakePDM) Release(ctx context.Context, pool, sub string) error {
|
||||
f.releases = append(f.releases, releaseCall{pool, sub})
|
||||
if f.release != nil {
|
||||
return f.release(ctx, pool, sub)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func decodeResp(t *testing.T, body io.Reader) SubdomainCheckResponse {
|
||||
t.Helper()
|
||||
var got SubdomainCheckResponse
|
||||
raw, _ := io.ReadAll(body)
|
||||
if err := json.Unmarshal(raw, &got); err != nil {
|
||||
t.Fatalf("decode response: %v (body=%s)", err, string(raw))
|
||||
}
|
||||
return got
|
||||
}
|
||||
|
||||
func makeRequest(body string) *http.Request {
|
||||
r := httptest.NewRequest(http.MethodPost, "/api/v1/subdomains/check", strings.NewReader(body))
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
return r
|
||||
}
|
||||
|
||||
func TestCheckSubdomain_ManagedPoolDelegatesToPDM(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works,openova.io")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{
|
||||
check: func(ctx context.Context, pool, sub string) (*pdm.CheckResult, error) {
|
||||
return &pdm.CheckResult{Available: true, FQDN: sub + "." + pool}, nil
|
||||
},
|
||||
}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
h.CheckSubdomain(w, makeRequest(`{"subdomain":"dadasg4543sdfs","poolDomain":"omani.works"}`))
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
got := decodeResp(t, w.Body)
|
||||
if !got.Available {
|
||||
t.Errorf("Available=false body=%+v", got)
|
||||
}
|
||||
if len(fake.checks) != 1 || fake.checks[0].pool != "omani.works" || fake.checks[0].sub != "dadasg4543sdfs" {
|
||||
t.Errorf("expected single PDM check call, got %+v", fake.checks)
|
||||
}
|
||||
}
|
||||
|
||||
// The architectural invariant: even if the customer happens to type a
|
||||
// random string that "would" resolve via the omani.works wildcard, PDM
|
||||
// (which has no DNS dependency) returns Available=true — i.e. the
|
||||
// wildcard parking record is NEVER consulted on the managed-pool path.
|
||||
func TestCheckSubdomain_WildcardParkingIsIgnored(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{
|
||||
check: func(ctx context.Context, pool, sub string) (*pdm.CheckResult, error) {
|
||||
// PDM has nothing in its allocation table — it returns
|
||||
// Available=true regardless of what DNS says.
|
||||
return &pdm.CheckResult{Available: true, FQDN: sub + "." + pool}, nil
|
||||
},
|
||||
}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
for _, sub := range []string{"foo", "dadasg4543sdfs", "totally-random-name"} {
|
||||
w := httptest.NewRecorder()
|
||||
h.CheckSubdomain(w, makeRequest(`{"subdomain":"`+sub+`","poolDomain":"omani.works"}`))
|
||||
got := decodeResp(t, w.Body)
|
||||
if !got.Available {
|
||||
t.Errorf("sub=%s: Available=false (wildcard regression!): %+v", sub, got)
|
||||
}
|
||||
}
|
||||
// Exactly one check per call — PDM is consulted, DNS is not.
|
||||
if len(fake.checks) != 3 {
|
||||
t.Errorf("expected 3 PDM checks, got %d", len(fake.checks))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckSubdomain_ManagedPoolPDMConflict(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{
|
||||
check: func(ctx context.Context, pool, sub string) (*pdm.CheckResult, error) {
|
||||
return &pdm.CheckResult{
|
||||
Available: false,
|
||||
Reason: "active-state",
|
||||
Detail: "this subdomain is already taken by a live Sovereign — pick a different name",
|
||||
FQDN: sub + "." + pool,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
h.CheckSubdomain(w, makeRequest(`{"subdomain":"omantel","poolDomain":"omani.works"}`))
|
||||
got := decodeResp(t, w.Body)
|
||||
if got.Available {
|
||||
t.Fatalf("expected unavailable, got %+v", got)
|
||||
}
|
||||
if got.Reason != "active-state" {
|
||||
t.Errorf("Reason=%s want active-state", got.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckSubdomain_BYOFallsBackToDNS(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
// Pick a domain that is guaranteed to be in DNS — example.com always
|
||||
// resolves. The handler should call LookupHost and surface "exists".
|
||||
w := httptest.NewRecorder()
|
||||
h.CheckSubdomain(w, makeRequest(`{"subdomain":"www","poolDomain":"example.com"}`))
|
||||
got := decodeResp(t, w.Body)
|
||||
if got.Available {
|
||||
t.Errorf("www.example.com should resolve and be unavailable: %+v", got)
|
||||
}
|
||||
if len(fake.checks) != 0 {
|
||||
t.Errorf("BYO path must NOT consult PDM; got %d checks", len(fake.checks))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckSubdomain_BYONXDomainAvailable(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
// A guaranteed-NXDOMAIN under example.com (RFC 6761).
|
||||
w := httptest.NewRecorder()
|
||||
h.CheckSubdomain(w, makeRequest(`{"subdomain":"this-name-must-not-resolve-1234567","poolDomain":"example.com"}`))
|
||||
got := decodeResp(t, w.Body)
|
||||
if !got.Available {
|
||||
t.Errorf("BYO NXDOMAIN should be available, got %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckSubdomain_InvalidLabel(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
h.CheckSubdomain(w, makeRequest(`{"subdomain":"-bad-","poolDomain":"omani.works"}`))
|
||||
got := decodeResp(t, w.Body)
|
||||
if got.Available {
|
||||
t.Errorf("invalid label should be unavailable")
|
||||
}
|
||||
if got.Reason != "invalid-format" {
|
||||
t.Errorf("Reason=%s want invalid-format", got.Reason)
|
||||
}
|
||||
if len(fake.checks) != 0 {
|
||||
t.Errorf("invalid label must short-circuit before PDM is called")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckSubdomain_PDMUnavailable(t *testing.T) {
|
||||
t.Setenv("DYNADOT_MANAGED_DOMAINS", "omani.works")
|
||||
pdm.ResetManagedDomains()
|
||||
|
||||
fake := &fakePDM{
|
||||
check: func(ctx context.Context, pool, sub string) (*pdm.CheckResult, error) {
|
||||
return nil, errors.New("connection refused")
|
||||
},
|
||||
}
|
||||
h := NewWithPDM(slog.Default(), fake)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
h.CheckSubdomain(w, makeRequest(`{"subdomain":"omantel","poolDomain":"omani.works"}`))
|
||||
got := decodeResp(t, w.Body)
|
||||
if got.Available {
|
||||
t.Errorf("PDM unavailable must NOT be reported as Available=true")
|
||||
}
|
||||
if got.Reason != "pdm-unavailable" {
|
||||
t.Errorf("Reason=%s want pdm-unavailable", got.Reason)
|
||||
}
|
||||
}
|
||||
295
products/catalyst/bootstrap/api/internal/pdm/client.go
Normal file
295
products/catalyst/bootstrap/api/internal/pdm/client.go
Normal file
@ -0,0 +1,295 @@
|
||||
// Package pdm — HTTP client for pool-domain-manager.
|
||||
//
|
||||
// This package is the catalyst-api side of the contract introduced by #163.
|
||||
// PDM owns every Dynadot write in the OpenOva fleet; catalyst-api never calls
|
||||
// api.dynadot.com directly anymore. The wizard's pre-submit check, the
|
||||
// reservation taken before `tofu apply`, the commit after the LB IP is known,
|
||||
// and the release on `tofu destroy` all flow through this client.
|
||||
//
|
||||
// Per docs/INVIOLABLE-PRINCIPLES.md #4 the base URL is read from the
|
||||
// POOL_DOMAIN_MANAGER_URL env var — defaulting to the in-cluster service
|
||||
// FQDN so a stock catalyst-api deployment "just works" against the PDM
|
||||
// running in openova-system. Tests/dev override the env var.
|
||||
package pdm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Client is the catalyst-api → PDM HTTP client.
|
||||
type Client struct {
|
||||
BaseURL string
|
||||
HTTP *http.Client
|
||||
}
|
||||
|
||||
// New constructs a Client. baseURL must NOT have a trailing slash.
|
||||
func New(baseURL string) *Client {
|
||||
return &Client{
|
||||
BaseURL: strings.TrimRight(baseURL, "/"),
|
||||
HTTP: &http.Client{Timeout: 15 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
// CheckResult mirrors PDM's response shape — kept loose so the wizard can
|
||||
// surface PDM's reason/detail strings verbatim without an extra mapping.
|
||||
type CheckResult struct {
|
||||
Available bool `json:"available"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
Detail string `json:"detail,omitempty"`
|
||||
FQDN string `json:"fqdn,omitempty"`
|
||||
}
|
||||
|
||||
// Check calls GET /api/v1/pool/{domain}/check?sub=X.
|
||||
func (c *Client) Check(ctx context.Context, poolDomain, subdomain string) (*CheckResult, error) {
|
||||
u := fmt.Sprintf("%s/api/v1/pool/%s/check?sub=%s",
|
||||
c.BaseURL, url.PathEscape(poolDomain), url.QueryEscape(subdomain))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build request: %w", err)
|
||||
}
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pdm check: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if resp.StatusCode >= 500 {
|
||||
return nil, fmt.Errorf("pdm /check status %d: %s", resp.StatusCode, truncate(string(body), 256))
|
||||
}
|
||||
var out CheckResult
|
||||
if err := json.Unmarshal(body, &out); err != nil {
|
||||
return nil, fmt.Errorf("decode pdm check: %w (body=%s)", err, truncate(string(body), 256))
|
||||
}
|
||||
return &out, nil
|
||||
}
|
||||
|
||||
// Reservation is the wire response of POST /reserve.
|
||||
type Reservation struct {
|
||||
PoolDomain string `json:"poolDomain"`
|
||||
Subdomain string `json:"subdomain"`
|
||||
State string `json:"state"`
|
||||
ReservedAt string `json:"reservedAt"`
|
||||
ExpiresAt string `json:"expiresAt"`
|
||||
ReservationToken string `json:"reservationToken"`
|
||||
CreatedBy string `json:"createdBy"`
|
||||
}
|
||||
|
||||
// ErrConflict — PDM returned 409 Conflict (subdomain already taken).
|
||||
var ErrConflict = errors.New("pool allocation conflict")
|
||||
|
||||
// ErrNotFound — PDM returned 404 (no row to commit/release).
|
||||
var ErrNotFound = errors.New("pool allocation not found")
|
||||
|
||||
// Reserve calls POST /api/v1/pool/{domain}/reserve. Returns ErrConflict on
|
||||
// 409 so callers can distinguish "name taken" from "PDM down".
|
||||
func (c *Client) Reserve(ctx context.Context, poolDomain, subdomain, createdBy string) (*Reservation, error) {
|
||||
body := map[string]string{
|
||||
"subdomain": subdomain,
|
||||
"createdBy": createdBy,
|
||||
}
|
||||
raw, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
u := fmt.Sprintf("%s/api/v1/pool/%s/reserve", c.BaseURL, url.PathEscape(poolDomain))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, u, bytes.NewReader(raw))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pdm reserve: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
switch resp.StatusCode {
|
||||
case http.StatusCreated:
|
||||
var out Reservation
|
||||
if err := json.Unmarshal(respBody, &out); err != nil {
|
||||
return nil, fmt.Errorf("decode reserve: %w (body=%s)", err, truncate(string(respBody), 256))
|
||||
}
|
||||
return &out, nil
|
||||
case http.StatusConflict:
|
||||
return nil, ErrConflict
|
||||
default:
|
||||
return nil, fmt.Errorf("pdm reserve status %d: %s", resp.StatusCode, truncate(string(respBody), 256))
|
||||
}
|
||||
}
|
||||
|
||||
// CommitInput maps to PDM's commit body shape.
|
||||
type CommitInput struct {
|
||||
Subdomain string
|
||||
ReservationToken string
|
||||
SovereignFQDN string
|
||||
LoadBalancerIP string
|
||||
}
|
||||
|
||||
// Commit calls POST /api/v1/pool/{domain}/commit.
|
||||
func (c *Client) Commit(ctx context.Context, poolDomain string, in CommitInput) error {
|
||||
body := map[string]string{
|
||||
"subdomain": in.Subdomain,
|
||||
"reservationToken": in.ReservationToken,
|
||||
"sovereignFQDN": in.SovereignFQDN,
|
||||
"loadBalancerIP": in.LoadBalancerIP,
|
||||
}
|
||||
raw, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
u := fmt.Sprintf("%s/api/v1/pool/%s/commit", c.BaseURL, url.PathEscape(poolDomain))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, u, bytes.NewReader(raw))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("pdm commit: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
switch resp.StatusCode {
|
||||
case http.StatusOK, http.StatusAccepted:
|
||||
return nil
|
||||
case http.StatusNotFound:
|
||||
return ErrNotFound
|
||||
default:
|
||||
return fmt.Errorf("pdm commit status %d: %s", resp.StatusCode, truncate(string(respBody), 256))
|
||||
}
|
||||
}
|
||||
|
||||
// Release calls DELETE /api/v1/pool/{domain}/release.
|
||||
func (c *Client) Release(ctx context.Context, poolDomain, subdomain string) error {
|
||||
body := map[string]string{"subdomain": subdomain}
|
||||
raw, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
u := fmt.Sprintf("%s/api/v1/pool/%s/release", c.BaseURL, url.PathEscape(poolDomain))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodDelete, u, bytes.NewReader(raw))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("pdm release: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
switch resp.StatusCode {
|
||||
case http.StatusOK, http.StatusAccepted:
|
||||
return nil
|
||||
case http.StatusNotFound:
|
||||
return ErrNotFound
|
||||
default:
|
||||
return fmt.Errorf("pdm release status %d: %s", resp.StatusCode, truncate(string(respBody), 256))
|
||||
}
|
||||
}
|
||||
|
||||
func truncate(s string, max int) string {
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max] + "..."
|
||||
}
|
||||
|
||||
// ── Managed-pool resolution ─────────────────────────────────────────────
|
||||
//
|
||||
// catalyst-api needs to know which pool domains PDM owns (so it knows when
|
||||
// to delegate to PDM vs. fall back to the BYO/DNS path). PDM exposes the
|
||||
// list at /healthz, but caching that on every wizard keystroke is wasteful.
|
||||
// Instead — per docs/INVIOLABLE-PRINCIPLES.md #4 — we read the same
|
||||
// DYNADOT_MANAGED_DOMAINS env var that the K8s ExternalSecret projects into
|
||||
// the PDM Pod, and that the same secret can project into the catalyst-api
|
||||
// Pod for this purpose. The env var value is the contract; PDM is the writer.
|
||||
|
||||
var managedDomainsState struct {
|
||||
once sync.Once
|
||||
set map[string]struct{}
|
||||
}
|
||||
|
||||
// IsManagedDomain reports whether the given domain is in the runtime
|
||||
// DYNADOT_MANAGED_DOMAINS list. catalyst-api uses this to route /check
|
||||
// requests: managed → PDM, BYO → DNS lookup.
|
||||
//
|
||||
// Resolution order mirrors the legacy dynadot package's so a deployment
|
||||
// migrating to PDM keeps working without secret edits:
|
||||
// 1. DYNADOT_MANAGED_DOMAINS env var (canonical)
|
||||
// 2. DYNADOT_DOMAIN single-value fallback
|
||||
// 3. Built-in defaults: openova.io, omani.works
|
||||
func IsManagedDomain(domain string) bool {
|
||||
d := strings.ToLower(strings.TrimSpace(domain))
|
||||
if d == "" {
|
||||
return false
|
||||
}
|
||||
managedDomainsState.once.Do(func() {
|
||||
managedDomainsState.set = computeManagedDomains()
|
||||
})
|
||||
_, ok := managedDomainsState.set[d]
|
||||
return ok
|
||||
}
|
||||
|
||||
// ResetManagedDomains clears the cache so tests can re-evaluate after
|
||||
// mutating env vars.
|
||||
func ResetManagedDomains() {
|
||||
managedDomainsState.once = sync.Once{}
|
||||
managedDomainsState.set = nil
|
||||
}
|
||||
|
||||
// ManagedDomains returns a sorted, deduplicated copy of the configured
|
||||
// managed-domain list.
|
||||
func ManagedDomains() []string {
|
||||
managedDomainsState.once.Do(func() {
|
||||
managedDomainsState.set = computeManagedDomains()
|
||||
})
|
||||
out := make([]string, 0, len(managedDomainsState.set))
|
||||
for d := range managedDomainsState.set {
|
||||
out = append(out, d)
|
||||
}
|
||||
for i := 1; i < len(out); i++ {
|
||||
for j := i; j > 0 && out[j-1] > out[j]; j-- {
|
||||
out[j-1], out[j] = out[j], out[j-1]
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func computeManagedDomains() map[string]struct{} {
|
||||
out := make(map[string]struct{})
|
||||
if raw := os.Getenv("DYNADOT_MANAGED_DOMAINS"); strings.TrimSpace(raw) != "" {
|
||||
out = splitDomainsList(raw)
|
||||
if len(out) > 0 {
|
||||
return out
|
||||
}
|
||||
}
|
||||
if d := strings.ToLower(strings.TrimSpace(os.Getenv("DYNADOT_DOMAIN"))); d != "" {
|
||||
out[d] = struct{}{}
|
||||
return out
|
||||
}
|
||||
out["openova.io"] = struct{}{}
|
||||
out["omani.works"] = struct{}{}
|
||||
return out
|
||||
}
|
||||
|
||||
func splitDomainsList(raw string) map[string]struct{} {
|
||||
raw = strings.ToLower(raw)
|
||||
raw = strings.ReplaceAll(raw, ",", " ")
|
||||
out := make(map[string]struct{})
|
||||
for _, p := range strings.Fields(raw) {
|
||||
out[p] = struct{}{}
|
||||
}
|
||||
return out
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user