309 lines
17 KiB
YAML
309 lines
17 KiB
YAML
# bp-catalyst-platform — Catalyst Blueprint #13 of 13. The umbrella
|
|
# Blueprint that brings up the Catalyst control plane: console, marketplace,
|
|
# admin, catalog-svc, projector, provisioning, environment-controller,
|
|
# blueprint-controller, billing.
|
|
#
|
|
# Per docs/ARCHITECTURE.md §11 (Catalyst-on-Catalyst): once this is Ready,
|
|
# the Sovereign is fully self-sufficient — sovereign-admin can log into
|
|
# console.${SOVEREIGN_FQDN} and proceed with Phase 2 day-1 setup.
|
|
#
|
|
# Wrapper chart: products/catalyst/chart/
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: catalyst-system
|
|
labels:
|
|
catalyst.openova.io/sovereign: ${SOVEREIGN_FQDN}
|
|
---
|
|
apiVersion: source.toolkit.fluxcd.io/v1beta2
|
|
kind: HelmRepository
|
|
metadata:
|
|
name: bp-catalyst-platform
|
|
namespace: flux-system
|
|
spec:
|
|
type: oci
|
|
interval: 15m
|
|
url: oci://ghcr.io/openova-io
|
|
secretRef:
|
|
name: ghcr-pull
|
|
---
|
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
kind: HelmRelease
|
|
metadata:
|
|
name: bp-catalyst-platform
|
|
namespace: flux-system
|
|
spec:
|
|
interval: 15m
|
|
releaseName: catalyst-platform
|
|
targetNamespace: catalyst-system
|
|
dependsOn:
|
|
- name: bp-gitea
|
|
# bp-gateway-api (issue #503): umbrella chart ships catalyst-ui +
|
|
# catalyst-api HTTPRoute templates; gateway.networking.k8s.io/v1
|
|
# CRDs must be registered first.
|
|
- name: bp-gateway-api
|
|
# bp-keycloak + bp-cnpg (issue #512): the catalyst-platform umbrella
|
|
# post-install Jobs bootstrap OIDC clients in Keycloak and seed
|
|
# PostgreSQL schemas for catalog-svc / projector / billing /
|
|
# provisioning. Both Keycloak and cnpg take 5+ minutes to reach Ready
|
|
# on a fresh Sovereign — without an explicit dep, the umbrella's
|
|
# hook starts before they're warm and times out at 15m.
|
|
# Phase-8a-preflight otech16 (2026-05-02): adding bp-keycloak +
|
|
# bp-cnpg here makes Flux wait for both Ready=True before starting
|
|
# the umbrella install, eliminating the race.
|
|
- name: bp-keycloak
|
|
- name: bp-cnpg
|
|
chart:
|
|
spec:
|
|
chart: bp-catalyst-platform
|
|
# 1.4.0 (issue #827): adds per-zone wildcard Certificate template.
|
|
# When `parentZones` is populated the chart renders one
|
|
# cert-manager.io/v1.Certificate per zone in kube-system; the
|
|
# Cilium Gateway listeners reference the per-zone Secrets. When
|
|
# `parentZones` is empty (legacy single-zone Sovereign) the chart
|
|
# falls back to a single Certificate covering `*.<sovereignFQDN>`
|
|
# so existing provisioning paths keep working.
|
|
# 1.4.1 (PR #839): RBAC dual-mode render fix (Helm + Kustomize).
|
|
# 1.4.2 (PR #841): POWERDNS env literal (no envsubst-mid-render).
|
|
# 1.4.3 (issue #859): auto-provision sme-pg CNPG Cluster +
|
|
# sme-secrets when ingress.marketplace.enabled=true so SME
|
|
# services land Ready on a fresh Sovereign without hand-rolled
|
|
# SealedSecrets. Catalyst-Zero (contabo) keeps its pre-existing
|
|
# clusters/contabo-mkt/apps/sme/data/* manifests — those are
|
|
# outside templates/kustomization.yaml's resource list so the
|
|
# contabo Kustomize-mode build is unaffected.
|
|
# 1.4.4 (issue #861): deploy FerretDB in `sme` ns + cross-ns
|
|
# CiliumNetworkPolicy from sme → valkey. Unblocks the 4 SME
|
|
# services (catalog, tenant, domain, provisioning) that pin to
|
|
# ferretdb.sme.svc.cluster.local for the MongoDB wire and the 2
|
|
# services (auth, gateway) that pin to valkey for session/state.
|
|
# cnpg-cluster.yaml extended to bootstrap sme_documents (FerretDB
|
|
# backing DB) alongside sme_billing.
|
|
# 1.4.5 (issue #863): mirror bp-valkey's auto-generated auth
|
|
# password from `valkey/valkey` Secret into `sme/sme-valkey-auth`
|
|
# via Helm lookup, and wire VALKEY_PASSWORD into auth + gateway
|
|
# Deployments. Clears the NOAUTH HELLO crashloop that started
|
|
# appearing after 1.4.4 made cross-ns Valkey reachable.
|
|
# 1.4.6 (issue #863 follow-up): rebuild chart artifact to bundle
|
|
# the rebuilt services-auth + services-gateway image (SHA fa4395f)
|
|
# that contains the ConnectValkeyWithAuth Go change. 1.4.5 shipped
|
|
# with the OLD image SHA baked in due to a race between the
|
|
# blueprint-release pipeline and the services-build deploy step.
|
|
# 1.4.7 (issue #866): mirror the gitea-admin password into
|
|
# `sme/provisioning-github-token` so the last 1/13 SME pod
|
|
# (provisioning) reaches Running 1/1 on a fresh Sovereign,
|
|
# completing the SME stack 12/13 → 13/13. Same lookup-and-mirror
|
|
# pattern as valkey-cross-ns-secret.yaml (#863).
|
|
# 1.4.8 (issue #868): fix marketplace UI PIN-signin — /api/*
|
|
# HTTPRoute now backendRefs sme/gateway:8080 (cross-namespace,
|
|
# authorised by ReferenceGrant). The previous catalyst-system/
|
|
# marketplace-api Service had zero backing Pods, so every signin
|
|
# POST 503'd at the gateway. Pairs with services-auth route alias
|
|
# /auth/send-pin → SendMagicLink (and /auth/verify-pin →
|
|
# VerifyMagicLink) so the UI's PIN-naming reaches the existing
|
|
# backend handler.
|
|
# 1.4.13 (issue #882): NEW templates/sme-services/sme-tenants-
|
|
# kustomization.yaml renders a Flux Kustomization in flux-system
|
|
# that watches ./clusters/<sov-fqdn>/sme-tenants — the path the
|
|
# catalyst-api SME-tenant orchestrator (sme_tenant_gitops.go)
|
|
# commits per-tenant overlays to. Without this, POST
|
|
# /api/v1/sme/tenants reached state=done optimistically but no
|
|
# K8s resources materialised because nothing reconciled the
|
|
# orchestrator's write target. Gated on
|
|
# ingress.marketplace.enabled — non-marketplace Sovereigns don't
|
|
# run the SME tenant pipeline.
|
|
# 1.4.14 (issue #879 follow-up): chart-version-only republish to
|
|
# bake catalyst-api image SHA 7bfd6df (the #879 fix commit) into
|
|
# values.yaml. 1.4.13 OCI bytes still reference the OLD image SHA
|
|
# because the deploy-bot updated values.yaml AFTER the chart was
|
|
# published. Same deploy-step race documented in 1.4.6 / 1.4.9 /
|
|
# 1.4.12 changelog.
|
|
# 1.4.15 (issue #887): auto-provision marketplace-api-secrets
|
|
# Secret on Sovereign install. templates/marketplace-api/
|
|
# deployment.yaml referenced a secretKeyRef on
|
|
# `marketplace-api-secrets` but the chart never rendered the
|
|
# Secret — caught live on otech103, marketplace-api in
|
|
# CreateContainerConfigError. Fix mirrors sme-secrets/
|
|
# valkey-cross-ns-secret/provisioning-github-token Helm-lookup
|
|
# persistence pattern. helm.sh/resource-policy: keep.
|
|
# 1.4.16 (#893/#889 follow-up): chart-version-only republish to
|
|
# bake catalyst-api image SHA 727fb2f (containing the parent-
|
|
# kustomization.yaml index + helmrepositories.yaml emit + correct
|
|
# per-blueprint sourceRef.name in tenant overlay templates) into
|
|
# values.yaml. Without this bump the OCI artifact still references
|
|
# the old image and the Sovereign's tenant orchestrator emits
|
|
# tenant overlays with stale openova-blueprints sourceRef.
|
|
# 1.4.17 (issue #901): unblock Sovereign Console login on every
|
|
# fresh provision. 3-bug chain:
|
|
# 1. NEW templates/catalyst-openova-kc-credentials-secret.yaml
|
|
# auto-mirrors the canonical KC SA Secret (`keycloak/
|
|
# catalyst-kc-sa-credentials`) into catalyst-system as
|
|
# `catalyst-openova-kc-credentials` with the keys
|
|
# api-deployment.yaml's PIN-auth env block expects. Gated on
|
|
# `lookup "v1" "Secret" "keycloak" "catalyst-kc-sa-credentials"`
|
|
# returning non-nil — renders only on Sovereign, skips on
|
|
# contabo (which has its own hand-rolled Secret). Same Helm-
|
|
# `lookup` persistence + `helm.sh/resource-policy: keep`
|
|
# pattern as templates/marketplace-api/secret.yaml (#887).
|
|
# 2. SMTP host/port/from defaults flow through .Values.sovereign.
|
|
# smtp.* (mail.openova.io:587 / noreply@openova.io). SMTP
|
|
# user/pass mirrored from `catalyst-system/sovereign-smtp-
|
|
# credentials` (#883) when present.
|
|
# 3. CATALYST_POST_AUTH_REDIRECT default flips from
|
|
# /sovereign/wizard (mothership-only) to /sovereign/components
|
|
# (post-handover Sovereign homepage). Per-Sovereign overlays
|
|
# override via catalystApi.env additional-env patch.
|
|
# 1.4.18 (issue #910): NEW templates/sme-services/sme-namespace.yaml
|
|
# creates the `sme` namespace on Sovereigns where the marketplace
|
|
# is enabled. Without this, chart 1.4.17 install failed 23 times
|
|
# with `failed to create resource: namespaces "sme" not found` on
|
|
# every fresh franchised Sovereign with marketplace.enabled=true —
|
|
# caught live on otech105 (2026-05-05). Same dual-mode contract as
|
|
# the rest of templates/sme-services/* (gated on
|
|
# ingress.marketplace.enabled, excluded from kustomization.yaml's
|
|
# resources: list).
|
|
# 1.4.19 (issue #910 — Bugs 2 + 3): unblock Sovereign Console PIN-
|
|
# login on a freshly franchised cluster.
|
|
# Bug 2: CATALYST_SESSION_COOKIE_DOMAIN literal flips from
|
|
# `console.openova.io` to `""` (empty). On a Sovereign the
|
|
# request host is console.<sov-fqdn>, so the previous hardcoded
|
|
# value made the browser reject Set-Cookie (RFC 6265 §5.3 step 6
|
|
# Domain mismatch) and every /api/* request landed without a
|
|
# session, redirecting to /login forever. Empty value contract
|
|
# (Domain attribute omitted → cookie binds to request host) is
|
|
# correct on BOTH Sovereign (console.<sov-fqdn>) AND contabo
|
|
# (console.openova.io — wizard + magic-link served from the
|
|
# same host). Per-Sovereign overlays MAY override via
|
|
# catalystApi.env additional-env patch for unusual topologies.
|
|
#
|
|
# Bug 3: catalyst-openova-kc-credentials-secret.yaml's smtp-
|
|
# user/smtp-pass lookup precedence inverts: SOURCE
|
|
# (sovereign-smtp-credentials, seeded by A5's provisioner #883)
|
|
# wins over the persisted target. Pre-1.4.19 target-wins meant
|
|
# first-install rendered empty SMTP creds, persisted them, and
|
|
# NEVER picked up A5's seeded bytes — POST /api/v1/auth/pin/
|
|
# issue 502'd `email-send-failed` for the life of the cluster.
|
|
# Source-wins makes every Flux 1m reconcile re-read the source.
|
|
# KC fields keep "existing target wins" because bp-keycloak
|
|
# auto-rotates the client-secret on every Helm upgrade and we
|
|
# want that rotation to require explicit operator action
|
|
# (delete the target) rather than auto-roll the catalyst-api
|
|
# Pod.
|
|
# 1.4.20 (#924): Phase-2 SMTP cutover. SOURCE-wins precedence
|
|
# extended to (a) non-secret fields smtp-host/smtp-port/smtp-from
|
|
# so the per-Sovereign Stalwart relay (`mail.<sovereignFQDN>`)
|
|
# takes over from the mothership default (`mail.openova.io`) on
|
|
# the next reconcile after slot 95 (bp-stalwart-sovereign) lands,
|
|
# and (b) canonical key shape `smtp-user`/`smtp-pass` in addition
|
|
# to the legacy `user`/`password` source key shape — the new
|
|
# chart writes both shapes, this chart reads either.
|
|
# 1.4.22 (#915 SME blockers): six chart + orchestrator fixes
|
|
# unblocking alice signup gates 2-6 on franchised Sovereigns —
|
|
# issues #934 (auth SMTP empty), #940 (provisioning placeholder
|
|
# GITHUB_TOKEN + hardcoded upstream github.com), #941 (catalog
|
|
# migrateAppDeployable missing openclaw + stalwart-mail), #942
|
|
# (REDPANDA_BROKERS hardcoded to talentmesh — switched to NATS
|
|
# JetStream on Sovereigns per ADR-0001), #943 (bp-newapi
|
|
# silently skipped Deployment — paired bp-newapi 1.4.0 auto-
|
|
# provisions CNPG cluster + credentials Secret), #944 (CRITICAL
|
|
# cross-cluster pollution — GIT_BASE_PATH was hardcoded to
|
|
# contabo-mkt; chart values now template per-Sovereign with
|
|
# provisioning-binary Go-side validation guard refusing commits
|
|
# to foreign cluster trees). 2026-05-05.
|
|
# 1.4.23: deploy-bot auto-bump (services-auth image SHA roll).
|
|
# 1.4.24 (#934 follow-up): smeSecrets.smtp.{host,port,from,user}
|
|
# defaults populated with mothership relay (mail.openova.io:587)
|
|
# so SME auth Pod's PIN delivery (gate 2) works on Sovereigns
|
|
# whose A5-seeded sovereign-smtp-credentials Secret only carries
|
|
# smtp-user + smtp-pass without host/port/from. 2026-05-05.
|
|
# 1.4.25: deploy-bot auto-bump (sme-services 94ffe01 image roll).
|
|
# 1.4.26 (#957 follow-up): catalyst-api-cutover-driver
|
|
# ClusterRole gains `create tokenreviews.authentication.k8s.io`
|
|
# so /api/v1/internal/cutover/trigger can validate the
|
|
# auto-trigger Job's SA token via TokenReview. Without this rule
|
|
# every trigger call returned 502 "token-review-failed" on
|
|
# otech113 (chart 0.1.18 fixed the readiness loop but exposed
|
|
# this missing-RBAC bug as the next failure). 2026-05-05.
|
|
# 1.4.29 (#983 follow-up): Sovereign Console URL contract — clean
|
|
# root URLs (/dashboard /jobs /cloud …), sovereign_self.go store
|
|
# fallback (data renders the moment cutover-import lands without
|
|
# waiting for the orchestrator's chart-values overlay write).
|
|
# 2026-05-05.
|
|
version: 1.4.32
|
|
sourceRef:
|
|
kind: HelmRepository
|
|
name: bp-catalyst-platform
|
|
namespace: flux-system
|
|
# Event-driven install: umbrella chart deploys ~10 Catalyst services
|
|
# (console, marketplace, admin, catalog-svc, projector, provisioning,
|
|
# environment-controller, blueprint-controller, billing). Inter-service
|
|
# readiness via OTel/NATS subjects is multi-minute and not Helm's
|
|
# concern. Replaces PR #221 spec.timeout: 15m.
|
|
#
|
|
# Issue #910 (otech105 incident, 2026-05-05): 15m was too tight for
|
|
# bp-catalyst-platform on a fresh franchised Sovereign with the full
|
|
# SME service stack (sme-services + tenant-orchestration + post-install
|
|
# secret mirror Jobs). The chart genuinely needs ~20 minutes worst
|
|
# case before remediation.retries kicks in. Bumped to 25m
|
|
# specifically for this umbrella chart — every other bp-* chart
|
|
# remains at its previous (or default) timeout because they install
|
|
# in well under 5 minutes empirically.
|
|
install:
|
|
disableWait: true
|
|
timeout: 25m
|
|
remediation:
|
|
retries: 3
|
|
upgrade:
|
|
disableWait: true
|
|
timeout: 25m
|
|
remediation:
|
|
retries: 3
|
|
# Per-Sovereign overrides for the umbrella — sovereign-FQDN-derived hostnames
|
|
# for console/admin/api. All chart-level Catalyst service config (image refs,
|
|
# OTel endpoints, NATS subjects) lives in products/catalyst/chart/values.yaml.
|
|
values:
|
|
global:
|
|
sovereignFQDN: ${SOVEREIGN_FQDN}
|
|
# sovereignLBIP — Sovereign's load-balancer public IPv4. Issue #900:
|
|
# the Day-2 multi-domain add-domain flow uses this to pre-register
|
|
# glue records at the customer's registrar before flipping NS.
|
|
# Resolved via envsubst from `SOVEREIGN_LB_IP` set in the Sovereign
|
|
# cloud-init env (rendered into bootstrap-kit by infra/hetzner from
|
|
# hcloud_load_balancer.main.ipv4 — see infra/hetzner/main.tf:274).
|
|
# When the Sovereign cloud-init pre-dates #900 the env stays empty
|
|
# and the chart renders an empty `lbIP` ConfigMap key — catalyst-api
|
|
# then short-circuits the glue registration and falls back to plain
|
|
# set_ns (legacy behaviour).
|
|
sovereignLBIP: ${SOVEREIGN_LB_IP}
|
|
ingress:
|
|
hosts:
|
|
console:
|
|
host: console.${SOVEREIGN_FQDN}
|
|
admin:
|
|
host: admin.${SOVEREIGN_FQDN}
|
|
marketplace:
|
|
host: marketplace.${SOVEREIGN_FQDN}
|
|
api:
|
|
host: api.${SOVEREIGN_FQDN}
|
|
# Marketplace mode (issue #710). Toggle to true via envsubst
|
|
# MARKETPLACE_ENABLED in the per-Sovereign overlay (catalyst-api
|
|
# writes this when the wizard's "Enable Marketplace" component is
|
|
# checked). When true, bp-catalyst-platform 1.3.0+ renders the
|
|
# marketplace + tenant-wildcard HTTPRoutes and the cross-namespace
|
|
# ReferenceGrant.
|
|
marketplace:
|
|
enabled: ${MARKETPLACE_ENABLED:-false}
|
|
# ─── Multi-zone parent domains (issue #827, parent epic #825) ──────
|
|
# One wildcard Certificate per parent zone, rendered by chart 1.4.0+
|
|
# into kube-system. Each cert renews independently; a stalled
|
|
# DNS-01 challenge on one zone never blocks another zone's renewal.
|
|
# Source of truth is the same ${PARENT_DOMAINS_YAML} variable used
|
|
# by bootstrap-kit slot 11 (bp-powerdns) so the two slots stay in
|
|
# lockstep on what the Sovereign considers a parent zone.
|
|
# When the operator brings only one parent domain (default
|
|
# zero-touch flow), cloud-init pre-renders this variable to a
|
|
# single-entry array derived from ${sovereign_fqdn}.
|
|
parentZones: ${PARENT_DOMAINS_YAML}
|