openova/infra/hetzner/cloudinit-control-plane.tftpl

#cloud-config
# Catalyst Sovereign control-plane bootstrap.
# Sovereign: ${sovereign_fqdn}
# Provisioned by: catalyst-provisioner (https://console.openova.io/sovereign)
#
# This script:
#   1. Installs OS hardening (SSH password-auth off, fail2ban, unattended-upgrades).
#   2. Installs k3s with --flannel-backend=none (Cilium replaces it).
#   3. Installs Flux + bootstraps the GitRepository pointing at the shared
#      clusters/_template/ tree in the public OpenOva monorepo. The
#      Sovereign's FQDN is interpolated into the template manifests via
#      Flux postBuild.substitute ($${SOVEREIGN_FQDN}) at apply time, so
#      no per-Sovereign directory needs to be committed before
#      provisioning. From this point Flux is the GitOps reconciler and
#      installs the 11-component bootstrap kit (Cilium → cert-manager →
#      Crossplane → ... → bp-catalyst-platform) in dependency order via
#      Kustomizations the _template directory ships.
#   4. Touches /var/lib/catalyst/cloud-init-complete so the catalyst-api
#      provisioner can detect cloud-init has finished.

package_update: true
package_upgrade: false
packages:
  - curl
  - iptables
  - jq
  - ca-certificates
  - git
%{ if enable_fail2ban ~}
  - fail2ban
%{ endif ~}
%{ if enable_unattended_upgrades ~}
  - unattended-upgrades
  - apt-listchanges
%{ endif ~}

write_files:
  - path: /var/lib/catalyst/sovereign.json
    permissions: '0644'
    content: |
      {
        "sovereignFQDN": "${sovereign_fqdn}",
        "sovereignSubdomain": "${sovereign_subdomain}",
        "orgName": ${jsonencode(org_name)},
        "orgEmail": ${jsonencode(org_email)},
        "region": "${region}",
        "haEnabled": ${ha_enabled},
        "workerCount": ${worker_count},
        "k3sVersion": "${k3s_version}",
        "gitopsRepoUrl": "${gitops_repo_url}",
        "gitopsBranch": "${gitops_branch}"
      }

  # ── Kernel inotify limits — k3s + Flux + CNPG + bao + Helm exhaust Ubuntu defaults ──
  # Default Hetzner Ubuntu 24.04 ships fs.inotify.max_user_instances=128
  # and fs.inotify.max_user_watches=524288 — but every Helm controller,
  # CNPG operator, k3s kubelet, file-watching admin tool grabs an
  # instance slot. On a 35-component bootstrap-kit the slots run out
  # mid-install and the next process to ask gets:
  #   failed to create fsnotify watcher: too many open files
  # Diagnosed live during otech35 — bp-openbao's `bao operator init`
  # crash-looped 4× with that exact error, which Flux escalated to
  # InstallFailed/RetriesExceeded — masking the real OS-level root cause.
  #
  # Bump well above k8s/k3s production guidance so future blueprint
  # additions don't tickle the same wall.
  - path: /etc/sysctl.d/99-catalyst-inotify.conf
    permissions: '0644'
    content: |
      fs.inotify.max_user_instances = 8192
      fs.inotify.max_user_watches = 1048576
      fs.inotify.max_queued_events = 16384

  # ── OS hardening: SSH daemon ──────────────────────────────────────────
  # Drop-in overrides /etc/ssh/sshd_config defaults. Per Catalyst's threat
  # model the operator's only valid path in is the Hetzner-project SSH key
  # injected via cloud-init authorized_keys. Password auth, KbdInteractive,
  # and root password login are all off.
  - path: /etc/ssh/sshd_config.d/99-catalyst-hardening.conf
    permissions: '0644'
    content: |
      # Managed by Catalyst Sovereign cloud-init — do not edit by hand.
      PasswordAuthentication no
      KbdInteractiveAuthentication no
      ChallengeResponseAuthentication no
      PermitRootLogin prohibit-password
      PermitEmptyPasswords no
      UsePAM yes
      X11Forwarding no
      AllowAgentForwarding no
      AllowTcpForwarding no
      ClientAliveInterval 300
      ClientAliveCountMax 2
      MaxAuthTries 3
      LoginGraceTime 30

%{ if enable_unattended_upgrades ~}
  # ── Unattended security upgrades ──────────────────────────────────────
  # Ubuntu's stock unattended-upgrades, restricted to the security pocket.
  # Runs daily, reboots automatically at 02:30 if a kernel upgrade requires
  # it (k3s tolerates single-node restarts on a solo Sovereign within the
  # ~60s window the Hetzner LB health-check covers).
  - path: /etc/apt/apt.conf.d/20auto-upgrades
    permissions: '0644'
    content: |
      APT::Periodic::Update-Package-Lists "1";
      APT::Periodic::Unattended-Upgrade "1";
      APT::Periodic::AutocleanInterval "7";
  - path: /etc/apt/apt.conf.d/52unattended-upgrades-catalyst
    permissions: '0644'
    content: |
      Unattended-Upgrade::Allowed-Origins {
        "$${distro_id}:$${distro_codename}-security";
        "$${distro_id}ESMApps:$${distro_codename}-apps-security";
        "$${distro_id}ESM:$${distro_codename}-infra-security";
      };
      Unattended-Upgrade::Automatic-Reboot "true";
      Unattended-Upgrade::Automatic-Reboot-Time "02:30";
      Unattended-Upgrade::Remove-Unused-Kernel-Packages "true";
      Unattended-Upgrade::Remove-Unused-Dependencies "true";
%{ endif ~}

%{ if enable_fail2ban ~}
  # ── fail2ban: sshd jail ───────────────────────────────────────────────
  # Even though SSH is firewalled to ssh_allowed_cidrs (or fully closed at
  # the firewall), fail2ban remains a defence-in-depth layer for the case
  # where the firewall rule is widened by an operator post-bootstrap.
  - path: /etc/fail2ban/jail.d/catalyst-sshd.local
    permissions: '0644'
    content: |
      [sshd]
      enabled = true
      port = ssh
      filter = sshd
      maxretry = 5
      findtime = 10m
      bantime = 1h
      backend = systemd
%{ endif ~}

  # ── flux-system/ghcr-pull Secret ─────────────────────────────────────
  #
  # Every HelmRepository CR in clusters/_template/bootstrap-kit/
  # references `secretRef: name: ghcr-pull` because the bp-* OCI artifacts
  # at `ghcr.io/openova-io/` are PRIVATE. Without this Secret, the
  # source-controller logs:
  #
  #   failed to get authentication secret 'flux-system/ghcr-pull':
  #     secrets "ghcr-pull" not found
  #
  # …and Phase 1 stalls at bp-cilium. The operator workaround (kubectl
  # apply the Secret by hand after Flux installs) is not durable across
  # re-provisioning of the same Sovereign — every fresh control-plane
  # boots without the Secret.
  #
  # We write the Secret into flux-system at cloud-init time, BEFORE
  # /var/lib/catalyst/flux-bootstrap.yaml is applied, so the GitRepository +
  # Kustomization land into a cluster that already has working GHCR creds.
  # The apply step is in runcmd: below; the manifest itself lives here.
  #
  # Token rotation policy: yearly, stored in 1Password under
  # "Catalyst — GHCR pull token (catalyst-ghcr-pull-token)". See
  # docs/SECRET-ROTATION.md. The token NEVER lives in git.
  - path: /var/lib/catalyst/ghcr-pull-secret.yaml
    permissions: '0600'
    content: |
      apiVersion: v1
      kind: Secret
      metadata:
        name: ghcr-pull
        namespace: flux-system
        annotations:
          # bp-reflector (slot 05a) mirrors this secret to every namespace
          # so all workloads can pull from ghcr.io/openova-io without
          # per-namespace manual creation. reflection-auto-enabled means
          # Reflector creates the copy in new namespaces as they appear.
          reflector.v1.k8s.emberstack.com/reflection-allowed: "true"
          reflector.v1.k8s.emberstack.com/reflection-allowed-namespaces: ""
          reflector.v1.k8s.emberstack.com/reflection-auto-enabled: "true"
          reflector.v1.k8s.emberstack.com/reflection-auto-namespaces: ""
      type: kubernetes.io/dockerconfigjson
      data:
        .dockerconfigjson: ${base64encode(jsonencode({
          auths = {
            "ghcr.io" = {
              username = ghcr_pull_username
              password = ghcr_pull_token
              auth     = ghcr_pull_auth_b64
            }
          }
        }))}

  # ── flux-system/harbor-robot-token Secret (issue #557 follow-up) ─────
  #
  # The catalyst-api Pod template (products/catalyst/chart/templates/
  # api-deployment.yaml) references a Secret named `harbor-robot-token`
  # via a REQUIRED (non-optional) secretKeyRef on every Sovereign. The
  # token authenticates pulls from the central harbor.openova.io
  # proxy-cache (proxy-dockerhub, proxy-gcr, proxy-quay, proxy-k8s,
  # proxy-ghcr) — the same value already interpolated into
  # /etc/rancher/k3s/registries.yaml below.
  #
  # Without this Secret the catalyst-api Pod stays in
  # CreateContainerConfigError indefinitely. Caught live on otech43,
  # otech45, otech46 — the operator workaround was hand-creating a
  # placeholder Secret on each iteration, which is a workaround, not
  # a fix.
  #
  # Why this Secret lives in flux-system + uses Reflector auto-mirror:
  # the same canonical pattern as `ghcr-pull` above — bp-reflector
  # (slot 05a) propagates the Secret to every namespace via
  # `reflector.v1.k8s.emberstack.com/reflection-auto-enabled: "true"`,
  # so catalyst-system (and any other namespace that needs the token)
  # picks it up event-driven on first reconcile.
  #
  # The Secret carries one key (`token`) which catalyst-api reads as
  # CATALYST_HARBOR_ROBOT_TOKEN and re-stamps onto every grandchild
  # Sovereign provision request — the Sovereign's own Sovereigns
  # (post-handover) inherit the same central proxy-cache auth.
  #
  # Token rotation: yearly, see docs/SECRET-ROTATION.md. Rotation flows
  # through `var.harbor_robot_token` → re-render cloud-init → re-apply
  # this Secret. The plaintext NEVER lives in git.
  - path: /var/lib/catalyst/harbor-robot-token-secret.yaml
    permissions: '0600'
    content: |
      apiVersion: v1
      kind: Secret
      metadata:
        name: harbor-robot-token
        namespace: flux-system
        annotations:
          # bp-reflector (slot 05a) mirrors this Secret to every
          # namespace so catalyst-api in catalyst-system (and any
          # workload added later that needs the central Harbor robot
          # auth) picks it up without per-namespace manual creation.
          reflector.v1.k8s.emberstack.com/reflection-allowed: "true"
          reflector.v1.k8s.emberstack.com/reflection-allowed-namespaces: ""
          reflector.v1.k8s.emberstack.com/reflection-auto-enabled: "true"
          reflector.v1.k8s.emberstack.com/reflection-auto-namespaces: ""
      type: Opaque
      data:
        token: ${base64encode(harbor_robot_token)}

  # ── cert-manager/dynadot-api-credentials Secret (issue #550) ─────────────
  #
  # The bp-cert-manager-dynadot-webhook Pod reads DYNADOT_API_KEY /
  # DYNADOT_API_SECRET / DYNADOT_MANAGED_DOMAINS from this Secret at startup.
  # The Secret MUST exist BEFORE the webhook Pod first starts — a missing
  # secretKeyRef (required, not optional) causes CrashLoopBackOff.
  #
  # Namespace: cert-manager — the same namespace the HelmRelease targets
  # (spec.targetNamespace: cert-manager in 49b-bp-cert-manager-dynadot-webhook.yaml).
  #
  # Why cloud-init and not a SealedSecret in git: the Dynadot credentials are
  # operator-issued, forwarded from the provisioner API wizard payload, and must
  # never land in a public git repository. Same rationale as ghcr-pull.
  - path: /var/lib/catalyst/dynadot-api-credentials.yaml
    permissions: '0600'
    content: |
      apiVersion: v1
      kind: Secret
      metadata:
        name: dynadot-api-credentials
        namespace: cert-manager
      type: Opaque
      data:
        api-key: ${base64encode(dynadot_key)}
        api-secret: ${base64encode(dynadot_secret)}
        domains: ${base64encode(dynadot_managed_domains)}

  # ── flux-system/object-storage Secret (issue #371, vendor-agnostic since #425) ─
  #
  # The Sovereign's per-cluster S3 credentials, materialised as a stock
  # Kubernetes Secret in the `flux-system` namespace. Harbor (#383) and
  # Velero (#384) consume this Secret via the canonical `secretRef` field
  # in their respective HelmRelease values blocks, e.g.
  #
  #   harbor:
  #     persistence:
  #       imageChartStorage:
  #         type: s3
  #         s3:
  #           existingSecret: object-storage
  #
  # Per #425 the Secret name is vendor-AGNOSTIC (`object-storage`, no
  # `hetzner-` prefix). A future AWS / Azure / GCP / OCI Sovereign
  # provisions the same Secret name with the same key set via its own
  # `infra/<provider>/` Tofu module — every existing chart Just Works
  # without renaming.
  #
  # The Secret is namespace-bound to flux-system so the helm-controller can
  # rewrite it into the workload namespaces at chart install time — that's
  # the same boundary `ghcr-pull` already uses, so the apply ordering in
  # runcmd: below stays a single sequenced step.
  #
  # Why pre-populated by cloud-init rather than a SealedSecret committed to
  # git: ADR-0001 §9.2 forbids bespoke cloud-API calls and Hetzner exposes
  # NO Cloud API for S3 credential issuance — they're operator-issued in
  # the Hetzner Console exactly once. Therefore catalyst-api receives the
  # plaintext from the wizard, validates it, and forwards it to the new
  # Sovereign via the same encrypted-PVC + cloud-init channel as the GHCR
  # pull token. The credentials never land in git; the only durable copies
  # are the per-deployment OpenTofu workdir (mode 0600, wiped on tofu
  # destroy) and inside the new Sovereign's etcd (encrypted at rest by
  # k3s default).
  #
  # Token rotation policy: per Hetzner's docs, the secret half is shown
  # exactly once at issue time. To rotate, the operator issues a fresh
  # credential pair in the Hetzner Console, updates the wizard payload
  # for the next provisioning, OR for an existing Sovereign uses a
  # day-2 Crossplane XRC write (the Provider+ProviderConfig planted
  # below makes this possible without a Tofu re-run; out of scope for
  # the initial bootstrap).
  - path: /var/lib/catalyst/object-storage-secret.yaml
    permissions: '0600'
    content: |
      apiVersion: v1
      kind: Secret
      metadata:
        name: object-storage
        namespace: flux-system
      type: Opaque
      stringData:
        # S3 endpoint URL — composed from object_storage_region in main.tf.
        # Format: https://<region>.your-objectstorage.com per Hetzner docs.
        s3-endpoint: ${object_storage_endpoint}
        # S3 region — fsn1 / nbg1 / hel1 (Object Storage availability is
        # European-only as of 2026-04). NOT the same as compute region.
        s3-region: ${object_storage_region}
        # Bucket name — deterministic per-Sovereign identifier composed by
        # the catalyst-api from the Sovereign's FQDN slug. Created (or
        # adopted if already present) by the minio_s3_bucket resource in
        # main.tf earlier in this same `tofu apply`.
        s3-bucket: ${object_storage_bucket_name}
        # Operator-issued S3 access key + secret key. Hetzner's docs note
        # the secret half is shown exactly once at credential creation
        # time and is irretrievable thereafter — losing it means rotating.
        s3-access-key: ${object_storage_access_key}
        s3-secret-key: ${object_storage_secret_key}

  # ── flux-system/cloud-credentials Secret (issue #425, OpenTofu→Crossplane) ─
  #
  # Bootstrap of the Crossplane Hetzner Cloud provider (planted further
  # below in this cloud-init). Carries the operator's hcloud API token —
  # the same token Tofu used to provision Phase 0 — under a single key
  # `hcloud-token`. Per ADR-0001 §11.3 + INVIOLABLE-PRINCIPLES #3,
  # Day-2 cloud-resource changes (additional Floating IPs, additional
  # buckets, scaling LoadBalancers, firewall rule edits, ...) flow
  # through Crossplane XRC writes against this Provider — NEVER through
  # bespoke Go cloud-API calls in catalyst-api, NEVER through manual
  # Tofu re-runs.
  #
  # The Secret name is vendor-agnostic (`cloud-credentials`); the
  # `hcloud-token` key name encodes the cloud-specific shape of the
  # credential. A future AWS Sovereign would write
  # `aws-access-key-id`/`aws-secret-access-key` keys into the same
  # Secret name; the matching Crossplane Provider/ProviderConfig
  # (added in the same Tofu module's cloud-init) reads them.
  - path: /var/lib/catalyst/cloud-credentials-secret.yaml
    permissions: '0600'
    content: |
      apiVersion: v1
      kind: Secret
      metadata:
        name: cloud-credentials
        namespace: flux-system
      type: Opaque
      stringData:
        hcloud-token: ${hcloud_token}

  # ── Crossplane provider-hcloud + ProviderConfig (issue #425) ────────
  #
  # Phase 0→Day-2 handover. After Flux installs Crossplane core (via
  # bp-crossplane in the bootstrap-kit), this Provider package + its
  # ProviderConfig come up in the cluster and become the seam for ALL
  # subsequent Hetzner Cloud mutations.
  #
  # Per ADR-0001 §11.3 + INVIOLABLE-PRINCIPLES #3:
  #   - OpenTofu provisions Phase 0 EXACTLY ONCE per Sovereign.
  #   - Crossplane is the only Day-2 cloud-API mutation seam.
  #   - Flux is the only GitOps reconciler.
  #   - Blueprints (`bp-<name>:<semver>` OCI) are the only install unit.
  #   - NEVER bespoke Go cloud-API calls. NEVER `exec.Command("helm",
  #     ...)`. NEVER direct `kubectl apply` of production manifests.
  #
  # Once `provider-hcloud` reaches `Healthy=True` (event the Catalyst
  # control plane observes via the Crossplane status conditions), the
  # catalyst-api's bespoke Hetzner-API calls for any RUNTIME-scaling
  # concern (additional Floating IPs, additional buckets, scaling
  # LoadBalancers, ...) MUST be retired in favour of XRC writes against
  # this Provider. Provisioning Phase 0 (the very first server, network,
  # firewall, LB, bucket) stays in this Tofu module by design — that's
  # the bootstrap exception that lets the Provider exist in the first
  # place.
  #
  # Package version pin: v0.4.0 of `crossplane-contrib/provider-hcloud`
  # is the latest stable as of 2026-05. Per INVIOLABLE-PRINCIPLES #4
  # (never hardcode), the version is operator-bumpable via PR; future
  # rotations land here in the same commit that bumps the
  # `bp-crossplane-claims` Composition referencing the new Provider
  # types.
  - path: /var/lib/catalyst/crossplane-provider-hcloud.yaml
    permissions: '0644'
    content: |
      ---
      apiVersion: pkg.crossplane.io/v1
      kind: Provider
      metadata:
        name: provider-hcloud
        labels:
          catalyst.openova.io/sovereign: ${sovereign_fqdn}
      spec:
        package: xpkg.upbound.io/crossplane-contrib/provider-hcloud:v0.4.0
        packagePullPolicy: IfNotPresent
      ---
      apiVersion: hcloud.crossplane.io/v1beta1
      kind: ProviderConfig
      metadata:
        name: default
      spec:
        credentials:
          source: Secret
          secretRef:
            namespace: flux-system
            name: cloud-credentials
            key: hcloud-token

  # ── Handover JWT public key (issue #605, Phase-8b) ───────────────────
  #
  # RFC 7517 JWK JSON of the Catalyst-Zero RS256 public key. Written to
  # /var/lib/catalyst/handover-jwt-public.jwk (mode 0600) on the new
  # Sovereign control-plane. Agent-C (auth/handover) reads this file to
  # verify the one-time handover JWT without a cross-cluster RPC.
  #
  # Per INVIOLABLE-PRINCIPLES.md #10 (credential hygiene): mode 0600 so
  # the file is readable only by root. Even though RSA public keys are
  # technically non-secret, tightening the permission costs nothing and
  # avoids any future confusion about sensitivity.
  - path: /var/lib/catalyst/handover-jwt-public.jwk
    permissions: '0600'
    content: |
      ${handover_jwt_public_key}

  # ── Cilium bootstrap values (issue #491) ─────────────────────────────
  #
  # The bootstrap helm install below MUST land the same effective values
  # as the Flux bp-cilium HelmRelease (clusters/_template/bootstrap-kit/
  # 01-cilium.yaml). Anything that differs becomes drift, and drift in
  # this particular release is fatal because:
  #
  #   1. Flux applies bp-cilium with `helm upgrade --install`, which is
  #      a no-op when the in-cluster release already has the right values
  #      and a UPGRADE when it does not.
  #   2. The bootstrap-kit Kustomization is `wait: true` (issue #492).
  #      Until cilium-agent is Ready, NO other HelmRelease in
  #      bootstrap-kit reconciles — including the bp-cilium upgrade
  #      itself, because Flux's source-controller will not pull a fresh
  #      GitRepository revision while the existing one is unhealthy.
  #   3. cilium-agent waits for the operator to register
  #      `ciliumenvoyconfigs` + `ciliumclusterwideenvoyconfigs` CRDs.
  #      The upstream chart only registers them when
  #      `envoyConfig.enabled=true`. If the bootstrap install omits
  #      that flag, the CRDs are never registered, the agent never
  #      reaches Ready, the upgrade never fires, and Phase 1 deadlocks.
  #
  # Phase-8a bug #16 (otech8 2026-05-01): the prior bootstrap helm
  # install used six --set flags (`kubeProxyReplacement`, `k8sService*`,
  # `tunnelProtocol`, `bpf.masquerade`) and produced a release missing
  # `envoyConfig.enabled`, `gatewayAPI.enabled`, `envoy.enabled`,
  # `l7Proxy`, `encryption.*`, `hubble.*`, etc. Every fresh provision
  # crash-looped cilium-agent.
  #
  # Canonical seam: this file IS the values overlay for the bootstrap
  # install, and `clusters/_template/bootstrap-kit/01-cilium.yaml`'s
  # `spec.values.cilium:` block IS the values overlay for the Flux HR.
  # The umbrella chart wraps under `cilium:` (subchart key), the
  # bootstrap install targets the upstream `cilium/cilium` chart
  # directly so values land at top level. The merged effective set
  # below mirrors `platform/cilium/chart/values.yaml`'s `cilium:`
  # block PLUS the overlay in 01-cilium.yaml. A divergence test in
  # `products/catalyst/bootstrap/api/internal/provisioner/
  # cilium_values_parity_test.go` (issue #491) locks the two files
  # together so a future operator cannot change one without the other.
  #
  # Per INVIOLABLE-PRINCIPLES.md #4 (never hardcode): the chart
  # version is parameterised below via the helm install --version flag,
  # and the values in this file are operator-overridable post-bootstrap
  # via the Flux HR's `spec.values` block (which always wins on
  # subsequent `helm upgrade`).
  - path: /var/lib/catalyst/cilium-values.yaml
    permissions: '0644'
    content: |
      # Catalyst bootstrap cilium values — MUST stay in lock-step with
      # platform/cilium/chart/values.yaml `cilium:` block + the overlay
      # in clusters/_template/bootstrap-kit/01-cilium.yaml. See the
      # comment block immediately above this write_files entry, and
      # cilium_values_parity_test.go for the regression guard.
      kubeProxyReplacement: true
      k8sServiceHost: 127.0.0.1
      k8sServicePort: 6443
      tunnelProtocol: vxlan
      bpf:
        masquerade: true
      ipam:
        mode: kubernetes
      encryption:
        enabled: true
        type: wireguard
      gatewayAPI:
        enabled: true
        gatewayClass:
          # Force GatewayClass creation regardless of CRD presence at Helm
          # render time. Default is "auto" which skips creation when the
          # gateway.networking.k8s.io CRDs are not yet present — exactly
          # what happens during bootstrap: the upstream Helm chart's
          # Capabilities check fires BEFORE bp-gateway-api has run, so
          # GatewayClass/cilium is never rendered into the release.
          # Forcing "true" ensures the GatewayClass is always created.
          # Fix for cilium-gateway-race (issue #503).
          create: "true"
      envoy:
        enabled: true
      # envoyConfig.enabled is the load-bearing flag from issue #491.
      # Without it the upstream chart skips the CiliumEnvoyConfig and
      # CiliumClusterwideEnvoyConfig CRD registrations, cilium-agent
      # waits forever for them, and the node taint
      # `node.cilium.io/agent-not-ready` never lifts.
      envoyConfig:
        enabled: true
      l7Proxy: true
      hubble:
        enabled: true
        relay:
          enabled: false
        ui:
          enabled: false
        metrics:
          # `null` (not [] and not a populated list) is the exact value
          # that makes the upstream chart skip the metrics ServiceMonitor
          # template branch. See platform/cilium/chart/values.yaml.
          enabled: null
          serviceMonitor:
            enabled: false
      l2announcements:
        enabled: false
      operator:
        replicas: 1
      resources:
        requests:
          cpu: 100m
          memory: 256Mi
        limits:
          memory: 1Gi
      prometheus:
        enabled: false
        serviceMonitor:
          enabled: false

  # ── containerd pull-through mirror: harbor.openova.io (issue #557, Option A) ──
  #
  # k3s uses containerd. Containerd's mirror table is configured via
  # /etc/rancher/k3s/registries.yaml BEFORE k3s starts — the file is
  # read once at startup and cannot be hot-reloaded without a k3s restart.
  #
  # Each `mirrors:` key is the upstream registry hostname containerd
  # intercepts. When a pod pulls `nats:2.10` (implicitly `docker.io/library/
  # nats:2.10`), containerd tries `harbor.openova.io/proxy-dockerhub/library/
  # nats:2.10` first; on a cache miss, harbor.openova.io fetches from
  # DockerHub and caches the blob for subsequent pulls by other pods or
  # Sovereign nodes.
  #
  # This eliminates DockerHub's anonymous rate limit (100 pulls/6h per IP)
  # on fresh Sovereign IPs where the bootstrap-kit pulls 50+ images in the
  # first 30 minutes.
  #
  # The `configs:` block supplies harbor.openova.io credentials so containerd
  # can authenticate against the proxy project (Harbor proxy-cache projects
  # are set Public but a robot account is provided for future private-project
  # pulls and consistent audit logging).
  #
  # harbor_robot_token is interpolated from var.harbor_robot_token (added to
  # infra/hetzner/variables.tf); the catalyst-api provisioner reads it from
  # the `harbor-robot-token` K8s Secret in the openova-harbor namespace on
  # contabo and passes it to each new Sovereign's cloud-init render at
  # provisioning time. This keeps the token out of git.
  #
  # CRITICAL ORDERING: this file MUST be written to disk BEFORE k3s installs
  # (the k3s install runcmd below). k3s reads registries.yaml at startup and
  # configures containerd's mirror table; a missing file at startup means
  # direct pulls from DockerHub for the entire lifetime of that node.
  - path: /etc/rancher/k3s/registries.yaml
    permissions: '0600'
    content: |
      # Harbor proxy-cache projects use the URL form
      #   https://harbor.openova.io/v2/<project>/<image>/manifests/<tag>
      # NOT
      #   https://harbor.openova.io/<project>/v2/<image>/manifests/<tag>
      # which is what containerd would naively build from
      # `endpoint: ["https://harbor.openova.io/<project>"]`.
      # Harbor returns its UI HTML (status 200, content-type text/html)
      # for the wrong-shape URL — containerd then surfaces:
      #   "unexpected media type text/html for sha256:..."
      # and cilium / coredns / pause-image pulls all fail forever.
      #
      # k3s registries.yaml supports a per-mirror `rewrite` map:
      # containerd builds `<endpoint>/v2/<repo>/...` (host-only endpoint),
      # then rewrite() transforms the repo path before the request goes out.
      # Mapping `(.*)` → `proxy-<flavor>/$1` produces the correct
      # Harbor-project-prefixed path. Diagnosed live during otech25.
      mirrors:
        "docker.io":
          endpoint:
            - "https://harbor.openova.io"
          rewrite:
            "(.*)": "proxy-dockerhub/$1"
        "quay.io":
          endpoint:
            - "https://harbor.openova.io"
          rewrite:
            "(.*)": "proxy-quay/$1"
        "gcr.io":
          endpoint:
            - "https://harbor.openova.io"
          rewrite:
            "(.*)": "proxy-gcr/$1"
        "registry.k8s.io":
          endpoint:
            - "https://harbor.openova.io"
          rewrite:
            "(.*)": "proxy-k8s/$1"
        "ghcr.io":
          endpoint:
            - "https://harbor.openova.io"
          rewrite:
            "(.*)": "proxy-ghcr/$1"
      configs:
        "harbor.openova.io":
          auth:
            username: "robot$openova-bot"
            password: "${harbor_robot_token}"

  # Flux GitRepository + Kustomizations that take over after k3s is up.
  #
  # ── Per-Sovereign tree vs. shared _template (issue #218) ─────────────
  #
  # Earlier revisions of this template selected a per-FQDN cluster tree
  # (`!/clusters/${sovereign_fqdn}`) and pointed the Kustomization
  # `spec.path` at `./clusters/${sovereign_fqdn}/bootstrap-kit`. That
  # required a per-Sovereign directory to be committed to the public
  # openova repo BEFORE provisioning, which the wizard does NOT do —
  # only `clusters/_template/` is canonical. Result on every fresh
  # Sovereign was Phase-1 stall:
  #   kustomization path not found:
  #     stat /tmp/kustomization-…/clusters/<fqdn>/bootstrap-kit:
  #     no such file or directory
  # (live evidence: otech.omani.works deployment ce476aaf80731a46.)
  #
  # Canonical fix: GitRepository selects the shared `_template/` tree,
  # Kustomization paths point at `clusters/_template/{bootstrap-kit,
  # infrastructure}`, and Flux's `postBuild.substitute` interpolates
  # `$${SOVEREIGN_FQDN}` into the template manifests at apply time. The
  # per-FQDN copy that prior provisioning depended on becomes a no-op:
  # one shared tree serves every Sovereign, with the Sovereign's FQDN
  # injected by Flux on the cluster instead of by sed in the repo.
  - path: /var/lib/catalyst/flux-bootstrap.yaml
    permissions: '0644'
    content: |
      apiVersion: source.toolkit.fluxcd.io/v1
      kind: GitRepository
      metadata:
        name: openova
        namespace: flux-system
      spec:
        interval: 1m
        url: ${gitops_repo_url}
        ref:
          branch: ${gitops_branch}
        ignore: |
          /*
          !/clusters/_template
          !/platform
          !/products
      ---
      # Two Flux Kustomizations with dependsOn so Crossplane CRDs land
      # before any resource that uses them is dry-run-applied.
      #
      # bootstrap-kit installs the 11 HelmReleases (Cilium, cert-manager,
      # Flux, Crossplane core, sealed-secrets, SPIRE, NATS-JetStream,
      # OpenBao, Keycloak, Gitea, bp-catalyst-platform). bp-crossplane
      # registers the Crossplane core CRDs (Provider, ProviderConfig…)
      # AND the bp-catalyst-platform umbrella reconciles the rest.
      #
      # infrastructure-config applies the cluster's Provider package +
      # ProviderConfig + Compositions. Because it dependsOn bootstrap-kit
      # AND uses wait: true, Flux waits until bootstrap-kit's HelmReleases
      # are Ready (Crossplane core + provider-hcloud installed,
      # hcloud.crossplane.io/v1beta1 CRDs registered) before dry-running
      # ProviderConfig — which is the exact ordering the prior single-
      # Kustomization model tripped over with:
      #   no matches for kind "ProviderConfig" in version
      #   "hcloud.crossplane.io/v1beta1"
      #
      # postBuild.substitute (issue #218): Flux's envsubst runs over the
      # rendered manifests after kustomize build, replacing $${SOVEREIGN_FQDN}
      # with the Sovereign's FQDN that this cloud-init was rendered for.
      # The template manifests in clusters/_template/bootstrap-kit/*.yaml
      # use $${SOVEREIGN_FQDN} as the substitution token.
      apiVersion: kustomize.toolkit.fluxcd.io/v1
      kind: Kustomization
      metadata:
        name: bootstrap-kit
        namespace: flux-system
      spec:
        interval: 5m
        path: ./clusters/_template/bootstrap-kit
        prune: true
        sourceRef:
          kind: GitRepository
          name: openova
        wait: true
        # timeout: 5m (issue #492). Phase-8a iteration discipline: when
        # the FIRST apply of bootstrap-kit is unhealthy (e.g. cilium
        # crash-loop from issue #491), kustomize-controller holds the
        # revision lock for the FULL timeout window and refuses to pick
        # up new GitRepository revisions, even fixes that have already
        # landed on main. The 30m default deadlocked otech8 deployment
        # 1bfc46347564467b: fix `66ea39f0` was on main 1m after the bad
        # SHA, but bootstrap-kit's `lastAttemptedRevision` stayed pinned
        # to the old SHA waiting for HRs to become Ready (which they
        # never would, because of #491). Operator wiped + reprovisioned.
        # 5m matches the GitRepository poll interval — failed reconciles
        # release the revision lock fast (~6m worst case) so a fresh fix
        # gets applied on the next poll. We KEEP `wait: true` to preserve
        # the consolidated "Kustomization Ready=True ⇒ every HR Ready"
        # contract that downstream `dependsOn: bootstrap-kit` relies on.
        timeout: 5m
        postBuild:
          substitute:
            SOVEREIGN_FQDN: ${sovereign_fqdn}
      ---
      apiVersion: kustomize.toolkit.fluxcd.io/v1
      kind: Kustomization
      metadata:
        name: sovereign-tls
        namespace: flux-system
      spec:
        # Carries the cert-manager Certificate that backs Cilium Gateway's
        # wildcard-TLS listener. Split out of bootstrap-kit so its
        # `cert-manager.io/v1` CRD-dependent dry-run only runs AFTER
        # bp-cert-manager is Ready (Phase-8a bug #13). dependsOn
        # bootstrap-kit's `wait: true` semantics: bootstrap-kit reaches
        # Ready iff every HelmRelease + Kustomization in it Ready=True.
        interval: 5m
        path: ./clusters/_template/sovereign-tls
        prune: true
        sourceRef:
          kind: GitRepository
          name: openova
        dependsOn:
          - name: bootstrap-kit
        wait: true
        # timeout: 5m (issue #492) — same rationale as bootstrap-kit.
        # Releases the revision lock quickly so iterative fixes during
        # Phase-8a get applied on the next GitRepository poll instead
        # of stalling 30 minutes per failed apply.
        timeout: 5m
        postBuild:
          substitute:
            SOVEREIGN_FQDN: ${sovereign_fqdn}
      ---
      apiVersion: kustomize.toolkit.fluxcd.io/v1
      kind: Kustomization
      metadata:
        name: infrastructure-config
        namespace: flux-system
      spec:
        interval: 5m
        path: ./clusters/_template/infrastructure
        prune: true
        sourceRef:
          kind: GitRepository
          name: openova
        dependsOn:
          - name: bootstrap-kit
          - name: sovereign-tls
        wait: true
        # timeout: 5m (issue #492) — same rationale as bootstrap-kit.
        # Releases the revision lock quickly so iterative fixes during
        # Phase-8a get applied on the next GitRepository poll instead
        # of stalling 30 minutes per failed apply.
        timeout: 5m
        postBuild:
          substitute:
            SOVEREIGN_FQDN: ${sovereign_fqdn}

runcmd:
  - swapoff -a
  - sed -i '/swap/d' /etc/fstab
  - update-alternatives --set iptables /usr/sbin/iptables-legacy || true
  - update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy || true

  # Apply inotify-limit bumps written by write_files. sysctl --system
  # picks up /etc/sysctl.d/*.conf so future blueprints + bao init never
  # hit "too many open files" again.
  - sysctl --system

  # Activate hardened sshd config (cloud-init may have written authorized_keys
  # already from Hetzner ssh_keys[]; we never touch that file).
  - systemctl reload ssh || systemctl reload sshd || true
%{ if enable_fail2ban ~}
  - systemctl enable --now fail2ban
%{ endif ~}
%{ if enable_unattended_upgrades ~}
  - systemctl enable --now unattended-upgrades
%{ endif ~}

  # k3s control-plane. Flags per docs/SOVEREIGN-PROVISIONING.md §3 and
  # docs/PLATFORM-TECH-STACK.md §8.1:
  #   --cluster-init                Initialise embedded etcd (HA-ready).
  #   --flannel-backend=none        Cilium replaces flannel.
  #   --disable=traefik             Cilium Gateway replaces traefik.
  #   --disable=servicelb           Hetzner LB handles ingress.
  #   --disable-network-policy      Cilium handles NetworkPolicy.
  #   --tls-san=${sovereign_fqdn}   API server cert valid for the sovereign FQDN.
  #
  # ── kube-apiserver OIDC flags (issue #326) ─────────────────────────────
  #   --kube-apiserver-arg=oidc-issuer-url=https://auth.<sovereign_fqdn>/realms/sovereign
  #   --kube-apiserver-arg=oidc-client-id=kubectl
  #   --kube-apiserver-arg=oidc-username-claim=preferred_username
  #   --kube-apiserver-arg=oidc-username-prefix=oidc:
  #   --kube-apiserver-arg=oidc-groups-claim=groups
  #   --kube-apiserver-arg=oidc-groups-prefix=oidc:
  # Wire k3s api-server's OIDC validator to the per-Sovereign Keycloak
  # realm (`sovereign`), shipped by bp-keycloak's keycloakConfigCli realm
  # import (platform/keycloak/chart/values.yaml). After the Sovereign's
  # bootstrap kit lands, customer admins authenticate kubectl against
  # Keycloak (see docs/omantel-handover-wbs.md §11 "kubectl OIDC for
  # customer admins"). The username/groups prefixes prefix every
  # OIDC-issued subject with `oidc:` so RoleBindings reference them as
  # e.g. `subjects[0].name=oidc:alice@org` — distinct from any local
  # ServiceAccount or x509 subject. Per INVIOLABLE-PRINCIPLES #4 the
  # issuer URL is composed from sovereign_fqdn — never hardcoded.
  #
  # Trust-chain note: the per-Sovereign Keycloak is exposed via the
  # `cilium-gateway` Gateway (kube-system), whose serving Certificate is
  # issued by Let's Encrypt via bp-cert-manager. k3s's kube-apiserver
  # reaches the in-cluster Keycloak Service over plain HTTPS using the
  # node's system trust store; LE roots are present by default on the
  # Ubuntu 24.04 control-plane image, so no `--oidc-ca-file` is needed
  # in this configuration. Air-gapped Sovereigns (deferred Phase 9+)
  # add a CA-file flag here when their Keycloak fronts a private CA.
  #
  # NOTE: --disable=local-storage is intentionally NOT passed. k3s ships a
  # built-in local-path-provisioner (Rancher) and registers a `local-path`
  # StorageClass. That is the canonical solo-Sovereign StorageClass:
  # PVCs (bp-spire data dir, bp-keycloak postgres, bp-openbao raft store,
  # bp-nats-jetstream, bp-gitea, bp-catalyst-platform postgres) bind to
  # node-local storage on the single CPX21/CPX31 control-plane node and
  # come up immediately. Operators upgrading to multi-node migrate to
  # hcloud-csi (Hetzner Cloud Volumes) as a separate, deliberate step —
  # see docs/RUNBOOK-PROVISIONING.md §"StorageClass missing".
  #
  # Architectural background: the prior version of this template passed
  # `--disable=local-storage` with the intent that Crossplane would
  # install hcloud-csi day-2 and register the StorageClass after
  # bp-crossplane reconciled. That created a circular dependency: the
  # 11-component bootstrap kit (bp-spire / bp-keycloak / bp-openbao / …)
  # all carry PVCs whose bind step blocks waiting for a StorageClass that
  # would only exist AFTER bp-crossplane had finished installing AND
  # provisioned hcloud-csi. Result on a fresh Sovereign: every PVC stuck
  # Pending forever, bootstrap-kit deadlocked. Keeping local-path solves
  # the circularity by giving the cluster a default StorageClass at boot.
  - 'curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${k3s_version} K3S_TOKEN=${k3s_token} INSTALL_K3S_EXEC="server --cluster-init --flannel-backend=none --disable-network-policy --disable=traefik --disable=servicelb --tls-san=${sovereign_fqdn} --kube-apiserver-arg=oidc-issuer-url=https://auth.${sovereign_fqdn}/realms/sovereign --kube-apiserver-arg=oidc-client-id=kubectl --kube-apiserver-arg=oidc-username-claim=preferred_username --kube-apiserver-arg=oidc-username-prefix=oidc: --kube-apiserver-arg=oidc-groups-claim=groups --kube-apiserver-arg=oidc-groups-prefix=oidc: --node-label catalyst.openova.io/role=control-plane --write-kubeconfig-mode=0644" sh -'

  # Wait for the API server to be reachable. Cilium needs to come up before
  # nodes Ready, so we wait specifically for the API endpoint.
  - 'until kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml get --raw /healthz; do sleep 5; done'

  # ── Default StorageClass: local-path (k3s built-in) ─────────────────────
  #
  # k3s ships local-path-provisioner (deployment in kube-system,
  # `app=local-path-provisioner`) and registers a `local-path`
  # StorageClass on first boot. We need the StorageClass to exist AND
  # be marked default BEFORE Flux applies the bootstrap-kit Kustomization
  # below — otherwise the 11-component bootstrap kit (bp-spire,
  # bp-keycloak postgres, bp-openbao, bp-nats-jetstream, bp-gitea,
  # bp-catalyst-platform postgres) ships HelmReleases with PVCs that
  # have no `storageClassName` set, expecting the cluster default to
  # take over. Without a default, every one of those PVCs sits Pending
  # waiting on a class that nobody nominates, and the bootstrap-kit
  # Kustomization deadlocks.
  #
  # Sequence (#207 — fix the circular wait that blocked every fresh provision):
  #   1. Poll until the `local-path` StorageClass object is registered by
  #      k3s. We CANNOT wait for the local-path-provisioner POD to be
  #      Ready here — k3s runs with --flannel-backend=none so the node
  #      stays Ready=False until Cilium installs (further down). Waiting
  #      on the Pod creates a circular deadlock and 60s timeout. The SC
  #      object itself is registered by k3s manifests independently of CNI
  #      (verified live: SC creationTimestamp 3s after k3s start).
  #   2. Patch the `local-path` StorageClass with the
  #      `storageclass.kubernetes.io/is-default-class: "true"` annotation.
  #   3. Verify (the poll already implies presence; the explicit grep stays
  #      as defensive belt-and-braces, identical exit semantics).
  - 'until kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml get sc local-path >/dev/null 2>&1; do sleep 2; done'
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml patch storageclass local-path -p ''{"metadata":{"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'''
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml get sc -o name | grep -q "^storageclass.storage.k8s.io/local-path$" || { echo "FATAL: local-path StorageClass missing after k3s install — see docs/RUNBOOK-PROVISIONING.md StorageClass missing section" >&2; exit 1; }'

%{ if deployment_id != "" && kubeconfig_bearer_token != "" && catalyst_api_url != "" ~}
  # ── Cloud-init kubeconfig postback (issue #183, Option D) ───────────────
  #
  # The k3s install above wrote /etc/rancher/k3s/k3s.yaml with the API
  # server URL pinned to https://127.0.0.1:6443 — kubectl's default for a
  # local single-node install. catalyst-api lives off-cluster (Catalyst-Zero
  # franchise console on contabo-mkt) and cannot reach 127.0.0.1 on this
  # node, so we MUST rewrite that field before sending the kubeconfig
  # back.
  #
  # Issue #542: kubeconfig server: must be the control-plane's PUBLIC IPv4,
  # NOT the load balancer's. The Hetzner LB only forwards 80/443 (Cilium
  # Gateway ingress); 6443 is exposed directly on the CP via firewall rule
  # main.tf:51-56 (0.0.0.0/0 → CP:6443). Earlier code rewrote to the LB IP
  # which silently fails with "connect: connection refused" — wizard jobs
  # page stuck PENDING for 50+ minutes after install completes.
  #
  # Plaintext: we read from /etc/rancher/k3s/k3s.yaml (mode 0644 written
  # by k3s), apply the rewrite via sed, write the result to
  # /etc/rancher/k3s/k3s.yaml.public (mode 0600 explicitly), then
  # curl --data-binary the file content to catalyst-api with the bearer
  # token. The .public file is removed at the end of the runcmd block
  # so the bearer-protected kubeconfig only lives on this node for the
  # few seconds it takes to PUT.
  #
  # --retry 60 --retry-delay 10 --retry-all-errors handles the case
  # where catalyst-api is briefly unreachable (image roll, ingress
  # reconciliation) — the cloud-init runcmd budget is bounded by the
  # systemd cloud-final timeout (~30 minutes).
  # control_plane_ipv4 resolved at runtime via Hetzner metadata service
  # (rather than templated by Tofu — that would create a dependency cycle:
  # cloud-init → control_plane.ipv4_address → control_plane.user_data → cloud-init).
  # 169.254.169.254 is the standard cloud metadata endpoint; Hetzner exposes
  # public-ipv4 at /hetzner/v1/metadata/public-ipv4 — single line, no auth.
  - install -m 0600 /dev/null /etc/rancher/k3s/k3s.yaml.public
  - 'CP_PUBLIC_IPV4=$(curl -fsSL --retry 30 --retry-delay 2 http://169.254.169.254/hetzner/v1/metadata/public-ipv4) && sed "s|https://127.0.0.1:6443|https://$${CP_PUBLIC_IPV4}:6443|g" /etc/rancher/k3s/k3s.yaml > /etc/rancher/k3s/k3s.yaml.public'
  - chmod 0600 /etc/rancher/k3s/k3s.yaml.public
  - |
    curl -fsSL --retry 60 --retry-delay 10 --retry-all-errors \
      -X PUT \
      -H "Authorization: Bearer ${kubeconfig_bearer_token}" \
      -H "Content-Type: application/x-yaml" \
      --data-binary @/etc/rancher/k3s/k3s.yaml.public \
      ${catalyst_api_url}/api/v1/deployments/${deployment_id}/kubeconfig
  - rm -f /etc/rancher/k3s/k3s.yaml.public
%{ endif ~}

  # ── Cilium FIRST (before Flux) ───────────────────────────────────────────
  #
  # k3s started with --flannel-backend=none, so the cluster has NO CNI yet.
  # If we apply Flux install.yaml at this point, the Flux controller pods
  # stay Pending forever — kubelet rejects them with
  #   "container runtime network not ready: cni plugin not initialized"
  # Flux is then unable to reconcile bp-cilium, so Cilium is never
  # installed → bootstrap deadlock that we hit in production at
  # omantel.omani.works deployment 5cd1bceaaacb71f6 (25 min stuck Pending).
  #
  # Bootstrap chicken-and-egg: Cilium IS the install unit (bp-cilium), but
  # Flux needs a CNI to run, and Cilium IS the CNI. Resolution: install
  # Cilium ONCE here via Helm with the same chart + values bp-cilium would
  # apply later. When Flux reconciles bp-cilium, it adopts the existing
  # release (Helm release-name match), so there is no churn.
  #
  # Per INVIOLABLE-PRINCIPLES.md #3 the GitOps engine is Flux — this Helm
  # install is the one-shot bootstrap exception explicitly authorised by
  # the same principle's "everything ELSE" qualifier. Both the chart
  # version AND the values must match `platform/cilium/blueprint.yaml`
  # + `clusters/_template/bootstrap-kit/01-cilium.yaml` so the bootstrap
  # install and the reconciled HelmRelease are byte-identical — issue
  # #491. The values come from /var/lib/catalyst/cilium-values.yaml
  # written via cloud-init `write_files:` above; chart version stays
  # inline as a --version flag because OpenTofu's `var.k3s_version`
  # parameterisation wires through to it (per INVIOLABLE-PRINCIPLES
  # #4 — never hardcode).
  # ── Gateway API CRDs BEFORE Cilium ──────────────────────────────────────
  #
  # Cilium 1.16.x operator checks for gateway.networking.k8s.io CRDs at
  # startup. If the CRDs are absent the operator disables its gateway
  # controller entirely and never re-checks — a static decision made once
  # at boot. This creates a race when Gateway API CRDs are installed AFTER
  # k3s/Cilium, which is the normal Flux GitOps order (bp-gateway-api
  # reconciles minutes after bp-cilium). Result: every fresh Sovereign has
  # no GatewayClass/cilium, all HTTPRoutes are orphaned, no routing.
  #
  # Fix: pre-install the Gateway API experimental CRDs here, before the
  # Cilium helm install below. The experimental channel is required because
  # Cilium 1.16.x references tlsroutes.gateway.networking.k8s.io (v1alpha2)
  # at startup; the standard channel does not ship TLSRoute.
  #
  # Version choice — v1.1.0 NOT v1.2.0:
  #   Gateway API v1.2.0 changed status.supportedFeatures from an array of
  #   strings to an array of objects ({name: string}). Cilium 1.16.5 still
  #   writes the old string format; the v1.2.0 CRD rejects its status patch
  #   with "must be of type object: string", leaving GatewayClass/cilium
  #   permanently in status=Unknown/Pending. v1.1.0 retains the string
  #   format and is fully compatible with Cilium 1.16.x.
  #
  # bp-gateway-api Flux blueprint becomes a no-op on first reconcile
  # (CRDs already present, kubectl apply is idempotent); it is kept as the
  # GitOps record and handles CRD upgrades when Cilium is bumped.
  #
  # Incident reference: otech22 2026-05-02 — all 8 HTTPRoutes orphaned,
  # cilium-operator log: "Required GatewayAPI resources are not found …
  # tlsroutes.gateway.networking.k8s.io not found". Fix: issue #503.
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.1.0/experimental-install.yaml'
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml wait --for=condition=Established crd/tlsroutes.gateway.networking.k8s.io --timeout=60s'

  - 'curl -sSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash'
  - 'helm repo add cilium https://helm.cilium.io/'
  - 'helm repo update'
  - |
    KUBECONFIG=/etc/rancher/k3s/k3s.yaml helm install cilium cilium/cilium \
      --version 1.16.5 \
      --namespace kube-system \
      -f /var/lib/catalyst/cilium-values.yaml
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml -n kube-system rollout status ds/cilium --timeout=240s'

  # Install Flux core. Cilium is now the cluster's CNI, so Flux pods will
  # actually start. Flux then reconciles clusters/_template/ (with
  # SOVEREIGN_FQDN substituted via postBuild — issue #218) which
  # adopts the Helm release above as bp-cilium and continues with
  # bp-cert-manager, bp-flux (which ADOPTS this Flux install rather than
  # reinstalls — see version-pin invariant below), bp-crossplane, etc.
  #
  # CRITICAL VERSION-PIN INVARIANT — DO NOT CHANGE IN ISOLATION
  # -----------------------------------------------------------
  # The version pinned in the URL below MUST match the upstream Flux
  # release that `platform/flux/chart/Chart.yaml`'s `flux2` subchart
  # bundles, otherwise bp-flux's HelmRelease runs `helm install` on top
  # of THIS Flux installation with a different upstream version, the
  # CRD `status.storedVersions` mismatches, Helm install fails, rollback
  # fires, and rollback DELETES the running Flux controllers — leaving
  # the cluster with no GitOps engine, unrecoverable in-place.
  #
  # Live verified on omantel.omani.works on 2026-04-29 — every Sovereign
  # provisioned without this pin in sync was destroyed minutes after
  # bp-flux's first reconcile. See docs/RUNBOOK-PROVISIONING.md
  # §"bp-flux double-install".
  #
  # Mapping (cloud-init install.yaml -> chart subchart -> appVersion):
  #   v2.4.0  ->  flux2 2.14.1  ->  appVersion 2.4.0  <- CURRENT
  #   v2.3.0  ->  flux2 2.13.0  ->  appVersion 2.3.0
  #
  # CI gate `platform/flux/chart/tests/version-pin-replay.sh` rejects
  # divergence between this URL's version and the chart's subchart pin.
  - 'curl -fsSL https://github.com/fluxcd/flux2/releases/download/v2.4.0/install.yaml | kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f -'
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml -n flux-system wait --for=condition=Available --timeout=300s deployment --all'

  # ── flux-system/ghcr-pull Secret (applied BEFORE GitRepository) ──────
  #
  # Apply the docker-registry pull secret rendered above. This MUST land
  # before the GitRepository + Kustomization in flux-bootstrap.yaml,
  # because the bootstrap-kit Kustomization includes HelmRepository CRs
  # that reference this Secret by name; the source-controller resolves
  # them on its first reconciliation tick and a missing Secret propagates
  # as a Ready=False/AuthError state that has been observed to persist
  # for 5+ minutes even after the Secret is later applied.
  #
  # Idempotent: `kubectl apply` against an existing Secret is a no-op
  # when the manifest's bytes match. A reprovision (same Sovereign FQDN)
  # rewrites this with the same content; a token rotation propagates
  # through here on the next cloud-init render.
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f /var/lib/catalyst/ghcr-pull-secret.yaml'

  # ── flux-system/harbor-robot-token Secret (issue #557 follow-up) ─────
  #
  # Apply the central-Harbor robot-account Secret BEFORE Flux reconciles
  # the bootstrap-kit. bp-catalyst-platform's catalyst-api Pod has a
  # REQUIRED (non-optional) secretKeyRef to `harbor-robot-token` in its
  # own namespace; bp-reflector (slot 05a) mirrors this Secret from
  # flux-system into catalyst-system on first reconcile so the Pod can
  # start cleanly. Same idempotency property as ghcr-pull above.
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f /var/lib/catalyst/harbor-robot-token-secret.yaml'

  # ── cert-manager/dynadot-api-credentials Secret (issue #550) ─────────
  #
  # Apply the Dynadot credentials BEFORE Flux reconciles the bootstrap-kit:
  # bp-cert-manager-dynadot-webhook (slot 49b) references this Secret via
  # secretKeyRef (required). A missing Secret causes CrashLoopBackOff and
  # stalls TLS issuance from Day 0.
  #
  # cert-manager namespace is created by bp-cert-manager via Flux — we
  # pre-create it idempotently here so the Secret apply does not fail.
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml create namespace cert-manager --dry-run=client -o yaml | kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f -'
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f /var/lib/catalyst/dynadot-api-credentials.yaml'

  # ── OpenBao auto-unseal seed Secret (issue #316) ─────────────────────
  #
  # Generate a one-shot 32-byte recovery seed during cloud-init and
  # write it to a K8s Secret `openbao-recovery-seed` in the `openbao`
  # namespace. The bp-openbao chart (v1.2.0+) renders a post-install
  # Job (templates/init-job.yaml, Helm hook weight 5) that:
  #   1. Reads this seed Secret.
  #   2. Calls `bao operator init -recovery-shares=1 -recovery-threshold=1`.
  #   3. Persists the recovery key inside OpenBao's auto-unseal config
  #      (so subsequent pod restarts unseal automatically).
  #   4. Deletes this seed Secret on success.
  #
  # The seed is single-use — once consumed by the init Job, it never
  # exists again. The recovery key + root token live ONLY inside
  # OpenBao's Raft state (acceptance criterion #6 of issue #316).
  #
  # Why a fresh /dev/urandom value (NOT a value baked into Terraform):
  # the recovery seed must NEVER be readable from outside the
  # control-plane node, NEVER appear in tfstate, NEVER appear in any
  # cloud-init render audit log. Generating it here at provision time
  # means the only window of plaintext exposure is the few seconds
  # between this Secret apply and the Helm post-install Job consuming
  # it — bounded by the bootstrap-kit reconcile cadence (1m max).
  #
  # Why we create the namespace here: the bp-openbao HelmRelease in
  # clusters/_template/bootstrap-kit/08-openbao.yaml ships a Namespace
  # manifest, but Flux applies that Namespace + the HelmRelease
  # together. The Helm post-install hook would race the seed Secret
  # apply if we waited for Flux to create the namespace. Pre-creating
  # the namespace at cloud-init time eliminates the race.
  #
  # Idempotency: `kubectl apply` of the namespace and `kubectl create
  # secret --dry-run=client -o yaml | kubectl apply -f -` of the
  # Secret are both safe to re-run. A re-provision (same Sovereign
  # FQDN) regenerates a fresh seed and re-applies — at which point the
  # init Job has either already consumed the previous seed (so the new
  # one becomes a no-op the next time the Helm hook runs) OR sees
  # OpenBao already initialised and exits idempotently without
  # touching the new seed.
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml create namespace openbao --dry-run=client -o yaml | kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f -'
  - |
    OPENBAO_SEED=$(head -c 32 /dev/urandom | base64 | tr -d '\n')
    kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml -n openbao create secret generic openbao-recovery-seed \
      --from-literal=recovery-seed="$OPENBAO_SEED" \
      --dry-run=client -o yaml \
      | kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml annotate --local -f - \
        openbao.openova.io/single-use=true -o yaml \
      | kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f -
    unset OPENBAO_SEED

  # ── flux-system/object-storage Secret (issue #371, vendor-agnostic since #425) ─
  #
  # Apply the operator-issued Object Storage credentials so they're in
  # the cluster BEFORE Flux reconciles bp-harbor (#383) and bp-velero
  # (#384). Both Blueprints reference `secretRef: name: object-storage`
  # in their HelmRelease values; without this Secret the install reports
  # NoSuchKey at chart-install probe time and Phase 1 stalls.
  #
  # Same idempotency property as ghcr-pull above — re-running cloud-init
  # against an existing Sovereign overwrites the manifest with the same
  # bytes (or rotated bytes when the operator has issued fresh keys); a
  # missing-bucket scenario is impossible by construction because main.tf's
  # minio_s3_bucket resource creates the bucket in the same `tofu apply`
  # run that renders this user_data.
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f /var/lib/catalyst/object-storage-secret.yaml'

  # ── flux-system/cloud-credentials Secret + Crossplane Provider (issue #425) ─
  #
  # Apply the Hetzner Cloud API token Secret + the Crossplane Provider
  # package + ProviderConfig BEFORE Flux's bootstrap-kit lands
  # bp-crossplane. The Provider package itself is installed by
  # Crossplane core (which bp-crossplane brings up); applying the
  # Provider CR here just registers the package install request — it
  # transitions Healthy=True a few minutes later once the bootstrap-
  # kit's Crossplane core controllers come online. The ProviderConfig
  # sits in waiting state until the Provider's CRDs are registered, at
  # which point it goes Ready=True and the Sovereign is ready to accept
  # Day-2 XRC writes.
  #
  # Per ADR-0001 §11.3 + INVIOLABLE-PRINCIPLES #3 this is the OpenTofu
  # → Crossplane handover seam. Tofu provisions Phase 0 exactly once;
  # everything else flows through XRC writes against this Provider.
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f /var/lib/catalyst/cloud-credentials-secret.yaml'
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f /var/lib/catalyst/crossplane-provider-hcloud.yaml'

  # Apply the Flux bootstrap GitRepository + Kustomization. From here, Flux
  # owns the cluster: pulls clusters/_template/ (with $${SOVEREIGN_FQDN}
  # substituted to ${sovereign_fqdn} via postBuild), installs Cilium
  # via bp-cilium, cert-manager via bp-cert-manager, etc., then bp-catalyst-platform.
  - 'kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml apply -f /var/lib/catalyst/flux-bootstrap.yaml'

  # Marker for the catalyst-api provisioner to detect cloud-init is done.
  - mkdir -p /var/lib/catalyst
  - touch /var/lib/catalyst/cloud-init-complete

final_message: "Catalyst control-plane bootstrap complete after $UPTIME seconds — Flux is now reconciling clusters/_template/ for ${sovereign_fqdn}"