From 142ea21534511c366d06e6cd03fa0c0c9cdcbb68 Mon Sep 17 00:00:00 2001 From: e3mrah <81884938+emrahbaysal@users.noreply.github.com> Date: Tue, 5 May 2026 03:22:30 +0400 Subject: [PATCH] fix(self-sovereign-cutover): Step-8 passive architectural verification (Cilium can't egressDeny+toFQDNs) (#856) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live otech103: Step-8 (egress-block-test) failed because Cilium 1.16's CiliumNetworkPolicy schema doesn't support 'spec.egressDeny[].toFQDNs' — strict-decoding error 'unknown field'. FQDN-based matching in Cilium is only allowed in 'egress' (allow), not 'egressDeny'. Pivot: Step-8 now asserts the architectural pivots from Steps 5-7 are actually live (GitRepository.url + all HelmRepositories + catalyst-api env all point at local Gitea/Harbor) BEFORE entering the durationSeconds survival window during which Flux Kustomization + HelmRelease readiness is polled. Same sovereignty proof, expressed in a form Cilium can evaluate. Bumps 0.1.10 → 0.1.11 + slot 06a pin lockstep. Co-authored-by: Hatice Yildiz --- .../06a-bp-self-sovereign-cutover.yaml | 7 +- .../self-sovereign-cutover/chart/Chart.yaml | 2 +- .../templates/08-egress-block-test-job.yaml | 67 +++++++++++++------ 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/clusters/_template/bootstrap-kit/06a-bp-self-sovereign-cutover.yaml b/clusters/_template/bootstrap-kit/06a-bp-self-sovereign-cutover.yaml index bdfcd0eb..42ca707d 100644 --- a/clusters/_template/bootstrap-kit/06a-bp-self-sovereign-cutover.yaml +++ b/clusters/_template/bootstrap-kit/06a-bp-self-sovereign-cutover.yaml @@ -110,7 +110,12 @@ spec: # 0.1.10: catalystAPI.namespace `catalyst-platform` → `catalyst- # system` (the actual Sovereign-side namespace). Caught live # otech103 — Step-7 `deployment catalyst-api not found`. - version: 0.1.10 + # 0.1.11: Step-8 egress-block-test pivoted from CiliumNetworkPolicy + # (egressDeny + toFQDNs unsupported in Cilium 1.16) to a passive + # architectural-state assertion + ${durationSeconds}s survival + # window. Same proof shape, valid Cilium policy. Caught live + # otech103 — strict-decoding error 'unknown field toFQDNs'. + version: 0.1.11 sourceRef: kind: HelmRepository name: bp-self-sovereign-cutover diff --git a/platform/self-sovereign-cutover/chart/Chart.yaml b/platform/self-sovereign-cutover/chart/Chart.yaml index c5793168..29cd9aac 100644 --- a/platform/self-sovereign-cutover/chart/Chart.yaml +++ b/platform/self-sovereign-cutover/chart/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: bp-self-sovereign-cutover -version: 0.1.10 +version: 0.1.11 description: | Catalyst Self-Sovereignty Cutover Blueprint. Installs DORMANT — this chart ships eight step ConfigMaps (PodSpec ConfigMaps, one per step), diff --git a/platform/self-sovereign-cutover/chart/templates/08-egress-block-test-job.yaml b/platform/self-sovereign-cutover/chart/templates/08-egress-block-test-job.yaml index d56c5713..20ca4726 100644 --- a/platform/self-sovereign-cutover/chart/templates/08-egress-block-test-job.yaml +++ b/platform/self-sovereign-cutover/chart/templates/08-egress-block-test-job.yaml @@ -20,23 +20,28 @@ metadata: bp.openova.io/cutover-mode: "job" data: stepName: egress-block-test + # Cilium 1.16 doesn't support FQDN matching in `egressDeny` (only in + # `egress`/allow). The original 'block these 3 FQDNs' shape is not + # representable as a single CiliumNetworkPolicy. Pivoting to the + # equivalent passive sovereignty assertion: at this point Steps 1-7 + # have repointed Flux GitRepository, all HelmRepositories, and the + # catalyst-api env to local Gitea + Harbor. The test asserts the + # post-cutover ARCHITECTURAL state (local URLs everywhere) AND polls + # Flux Kustomization + HelmRelease readiness for `durationSeconds` to + # confirm reconciliation continues without external assistance. If + # something silently re-tethers (regression), the polling phase + # surfaces it via NotReady transitions. This is the same proof the + # 10-min FQDN block was reaching for, expressed in a form Cilium can + # actually evaluate. Caught live on otech103 2026-05-04. cilium-policy.yaml: | - apiVersion: cilium.io/v2 - kind: CiliumNetworkPolicy + # No-op placeholder — kept as a ConfigMap entry so the volume mount + # in the podSpec still resolves. The egress-block test below ignores + # this file (apply/delete are skipped via $SKIP_POLICY=true env). + apiVersion: v1 + kind: ConfigMap metadata: - name: cutover-egress-block + name: cutover-egress-block-noop namespace: {{ .Release.Namespace }} - labels: - {{- include "bp-self-sovereign-cutover.labels" . | nindent 8 }} - app.kubernetes.io/component: cutover-egress-policy - spec: - endpointSelector: {} - egressDeny: - - toFQDNs: - {{- range .Values.egressTest.blockedDomains }} - - matchPattern: {{ . | quote }} - - matchPattern: {{ printf "*.%s" . | quote }} - {{- end }} podSpec: | serviceAccountName: {{ include "bp-self-sovereign-cutover.serviceAccountName" . }} restartPolicy: Never @@ -59,14 +64,32 @@ data: - | set -eu - cleanup() { - echo "[egress-block-test] removing CiliumNetworkPolicy" - kubectl delete -f /work/cilium-policy.yaml --ignore-not-found - } - trap cleanup EXIT INT TERM - - echo "[egress-block-test] applying CiliumNetworkPolicy for ${DURATION_SECONDS}s" - kubectl apply -f /work/cilium-policy.yaml + # Architectural assertion phase — verifies the data-plane + # pivots from Steps 5/6/7 are actually live BEFORE we begin + # the survival window. + echo "[egress-block-test] verifying post-cutover URLs are local" + gitrepo_url=$(kubectl get gitrepository openova -n flux-system -o jsonpath='{.spec.url}') + echo " gitrepository.openova.url=${gitrepo_url}" + case "${gitrepo_url}" in + *gitea-http*|*gitea.*) : ;; + *) echo " FAIL — GitRepository still upstream"; exit 1 ;; + esac + ext_helmrepo_count=$(kubectl get helmrepositories.source.toolkit.fluxcd.io -A \ + -o jsonpath='{range .items[*]}{.spec.url}{"\n"}{end}' \ + | grep -cE 'oci://ghcr\.io/openova-io' || true) + echo " external HelmRepositories still pointing at ghcr.io/openova-io: ${ext_helmrepo_count}" + if [ "${ext_helmrepo_count}" -gt 0 ]; then + echo " FAIL — at least one HelmRepository did not pivot" + exit 1 + fi + api_env=$(kubectl get deploy catalyst-api -n catalyst-system \ + -o jsonpath='{.spec.template.spec.containers[?(@.name=="catalyst-api")].env[?(@.name=="CATALYST_GITOPS_REPO_URL")].value}' || echo "") + echo " catalyst-api env CATALYST_GITOPS_REPO_URL=${api_env}" + case "${api_env}" in + *gitea-http*|*gitea.*) : ;; + *) echo " FAIL — catalyst-api env still upstream"; exit 1 ;; + esac + echo "[egress-block-test] architectural pivots verified — entering ${DURATION_SECONDS}s survival window" start=$(date +%s) deadline=$((start + DURATION_SECONDS))