feat(catalyst-api): mother→child cutover data transfer at handover (#977)

The data half of the mother→child contract that PR #976 set up the
URL routing for. At handover the mother POSTs the full deployment
record (events, jobs history, HRs, cloud topology, kubeconfig meta)
to the child's POST /api/v1/internal/deployments/import — the child
persists it locally so its /api/v1/deployments/{id}/* endpoints
answer with byte-byte-identical data the operator sees on the mother
view at /sovereign/provision/<id>/<page>.

Result: on the child cluster, clean URLs (/dashboard, /apps, /jobs,
/cloud) render with REAL data (events, exec logs, job statuses,
treemap utilisation) instead of empty arrays.

- New endpoint: POST /api/v1/internal/deployments/import (child)
  Validates by FQDN match against CATALYST_OTECH_FQDN. Idempotent.
- Mother fireHandover() now posts the record to the child after the
  JWT mint as a fire-and-forget goroutine. Failure logs loudly per
  INVIOLABLE-PRINCIPLES #3 but does not block SSE emit.

Bumped: bp-catalyst-platform 1.4.27 → 1.4.28.

Co-authored-by: hatiyildiz <hatice.yildiz@openova.io>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
e3mrah 2026-05-05 20:51:03 +04:00 committed by GitHub
parent c4bc7cac89
commit ed8872a15b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 216 additions and 3 deletions

View File

@ -226,7 +226,7 @@ spec:
# every trigger call returned 502 "token-review-failed" on
# otech113 (chart 0.1.18 fixed the readiness loop but exposed
# this missing-RBAC bug as the next failure). 2026-05-05.
version: 1.4.27
version: 1.4.28
sourceRef:
kind: HelmRepository
name: bp-catalyst-platform

View File

@ -250,6 +250,20 @@ func main() {
// silently falls back to URL params there. See sovereign_self.go.
r.Get("/api/v1/sovereign/self", h.HandleSovereignSelf)
// /api/v1/internal/deployments/import — Sovereign-side receiver for
// the full deployment record POSTed by the contabo mother at
// handover time. Mother's fireHandover() ships the record here after
// the JWT mint completes; child persists it locally so the child's
// own /api/v1/deployments/{id}/* endpoints answer byte-byte-identical
// to the mother's view. Closes the data half of the mother→child
// contract (PR #976 closed the URL routing half).
//
// Outside RequireSession: cross-cluster ingress at handover happens
// before any operator session exists on the child. Validation is
// instead done by FQDN match against CATALYST_OTECH_FQDN env — a
// record claiming a different FQDN is rejected.
r.Post("/api/v1/internal/deployments/import", h.HandleDeploymentImport)
// Wire the tenant registry — flat-file store at
// CATALYST_DEPLOYMENTS_DIR/-tenant-registry.json. Per ADR-0001 §6
// the catalyst-api is the host process for the unified-rbac slice

View File

@ -0,0 +1,88 @@
// deployment_handover_export.go — mother-side cutover data transfer.
//
// At handover (fireHandover), the mother POSTs the full deployment
// record (events, jobs history, HRs, cloud topology, kubeconfig
// metadata) to the freshly-provisioned child's catalyst-api at
//
// POST https://api.<sovereign-fqdn>/api/v1/internal/deployments/import
//
// The receiving Sovereign persists it to its local store (see
// deployment_handover_import.go) so its operator-facing endpoints
// answer with byte-byte-identical data. Closes the data half of the
// mother→child contract.
package handler
import (
"bytes"
"crypto/tls"
"encoding/json"
"net/http"
"time"
)
// exportDeploymentToChild ships the deployment record to the child's
// catalyst-api. Called as a goroutine from fireHandover so it never
// blocks the SSE emit.
func (h *Handler) exportDeploymentToChild(dep *Deployment, fqdn string) {
if h.store == nil {
h.log.Warn("deployment-export: no store; cannot export record",
"id", dep.ID,
)
return
}
dep.mu.Lock()
rec := dep.toRecord()
depID := dep.ID
dep.mu.Unlock()
body, err := json.Marshal(rec)
if err != nil {
h.log.Error("deployment-export: marshal failed",
"id", depID,
"err", err,
)
return
}
url := "https://api." + fqdn + "/api/v1/internal/deployments/import"
req, err := http.NewRequest(http.MethodPost, url, bytes.NewReader(body))
if err != nil {
h.log.Error("deployment-export: NewRequest failed",
"id", depID,
"url", url,
"err", err,
)
return
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec // child's LE cert may be seconds behind handover; operator browsers always see the validated cert
},
}
resp, err := client.Do(req)
if err != nil {
h.log.Error("deployment-export: POST failed",
"id", depID,
"url", url,
"err", err,
)
return
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
h.log.Error("deployment-export: child rejected import",
"id", depID,
"url", url,
"status", resp.StatusCode,
)
return
}
h.log.Info("deployment-export: shipped to child",
"id", depID,
"url", url,
"events", len(rec.Events),
)
}

View File

@ -0,0 +1,100 @@
// deployment_handover_import.go — POST /api/v1/internal/deployments/import
//
// Sovereign-side endpoint that receives the full deployment record from
// the contabo mother at handover time. Mother's fireHandover() POSTs the
// record here AFTER mint+persist on its side; the receiving Sovereign
// stores it locally so its `/api/v1/deployments/{id}/...` endpoints
// answer with byte-byte-identical data.
//
// This closes the data half of the mother→child contract. Combined with
// PR #976's URL routing (clean /dashboard, /apps, /jobs etc on child),
// the operator's Sovereign Console renders pixel-byte-byte identical to
// the mother view at console.openova.io/sovereign/provision/<id>/<page>.
//
// Auth: NOT session-gated. The endpoint validates the request body by
// checking that the deployment record's SovereignFQDN matches the
// receiving cluster's own CATALYST_OTECH_FQDN env. A request to import
// a record claiming a different FQDN is silently rejected — protects
// against cross-cluster data poisoning.
//
// Idempotent: if a record with the same id already exists in the local
// store, the import OVERWRITES it (intentional — mother is the source
// of truth for events / job history during provisioning, and a re-fire
// of handover is a legitimate retry).
package handler
import (
"encoding/json"
"net/http"
"os"
"strings"
"github.com/openova-io/openova/products/catalyst/bootstrap/api/internal/store"
)
// HandleDeploymentImport receives the full deployment record from the
// mother and persists it to this cluster's local catalyst-api store.
func (h *Handler) HandleDeploymentImport(w http.ResponseWriter, r *http.Request) {
if h.store == nil {
writeJSON(w, http.StatusServiceUnavailable, map[string]string{
"error": "store-unavailable",
"detail": "catalyst-api has no persistence layer; cannot import deployment record",
})
return
}
var rec store.Record
if err := json.NewDecoder(r.Body).Decode(&rec); err != nil {
writeJSON(w, http.StatusBadRequest, map[string]string{
"error": "bad-record",
"detail": err.Error(),
})
return
}
// FQDN check — protect against cross-cluster data poisoning.
expectedFQDN := strings.TrimSpace(os.Getenv("CATALYST_OTECH_FQDN"))
if expectedFQDN == "" {
writeJSON(w, http.StatusServiceUnavailable, map[string]string{
"error": "not-a-sovereign",
"detail": "this catalyst-api is not running on a Sovereign cluster (CATALYST_OTECH_FQDN unset) — refusing to accept deployment-record import",
})
return
}
if !strings.EqualFold(rec.Request.SovereignFQDN, expectedFQDN) {
writeJSON(w, http.StatusForbidden, map[string]string{
"error": "fqdn-mismatch",
"detail": "deployment record's SovereignFQDN does not match this cluster's CATALYST_OTECH_FQDN — refusing to import a foreign record",
})
return
}
if strings.TrimSpace(rec.ID) == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{
"error": "missing-id",
"detail": "deployment record carries no id",
})
return
}
if err := h.store.Save(rec); err != nil {
h.log.Error("deployment-import: store.Save failed",
"id", rec.ID,
"err", err,
)
writeJSON(w, http.StatusInternalServerError, map[string]string{
"error": "save-failed",
"detail": err.Error(),
})
return
}
h.log.Info("deployment-import: persisted",
"id", rec.ID,
"fqdn", rec.Request.SovereignFQDN,
"events", len(rec.Events),
)
writeJSON(w, http.StatusOK, map[string]any{
"ok": true,
"deploymentId": rec.ID,
})
}

View File

@ -674,6 +674,17 @@ func (h *Handler) fireHandover(dep *Deployment) {
dep.mu.Unlock()
h.persistDeployment(dep)
// Mother → child cutover data transfer. POST the full deployment
// record to the child's catalyst-api so its `/api/v1/deployments/{id}/*`
// endpoints answer with byte-byte-identical data the operator sees on
// the mother view. Fire-and-forget: a transient network blip during
// the POST does not block the JWT mint or SSE emit; mother stays the
// source of truth (operators can re-fire handover via /mint-handover-token
// to retry the import). Per docs/INVIOLABLE-PRINCIPLES.md #3 — no
// silent fallback, the failure is logged loudly so it surfaces in the
// catalyst-api journal.
go h.exportDeploymentToChild(dep, fqdn)
// Emit the typed SSE event. The Message field IS the data payload
// (see writeSSEEvent in deployments.go) — a JSON object the
// wizard parses verbatim. Per #768's contract the payload is

View File

@ -124,8 +124,8 @@ name: bp-catalyst-platform
# otech113 2026-05-05 — chart 0.1.18 fixed the readiness-probe loop
# but every trigger immediately got 502 in <10ms (synchronous
# apiserver permission rejection). 2026-05-05.
version: 1.4.27
appVersion: 1.4.27
version: 1.4.28
appVersion: 1.4.28
description: |
Catalyst Platform — the unified Catalyst control plane umbrella chart for Catalyst-Zero.
Composes the catalyst-{ui,api}, console, admin, marketplace UI modules and the marketplace-api backend.