fix(self-sovereign-cutover): Step-8 baseline-diff (only NEW regressions count) (#858)

Live otech103: Step-8 survival window failed because infrastructure-config Kustomization had been NotReady for 4h pre-cutover (Crossplane provider CRD ordering — unrelated to sovereignty). Sovereignty proof asks 'did cutover break anything', not 'is the cluster perfect'. Capture baseline NotReady set before the window, only fail on NEW additions during.

Bumps 0.1.12 → 0.1.13 + slot 06a pin lockstep.

Co-authored-by: Hatice Yildiz <hatice.yildiz@openova.io>
This commit is contained in:
e3mrah 2026-05-05 04:20:16 +04:00 committed by GitHub
parent d5d1d9b2cd
commit 9b710049e3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 40 additions and 15 deletions

View File

@ -120,7 +120,14 @@ spec:
# Flux Kustomization re-applies from bootstrap-kit slots after
# Step-6's patch. Data-plane impact null — they're not pulled
# again until next cutover cycle. Caught live otech103.
version: 0.1.12
# 0.1.13: Step-8 survival window captures BASELINE NotReady set
# before entering the window, then only fails on NEW Ready=False
# transitions (regressions). Pre-existing failures (Crossplane
# provider CRD ordering, etc.) don't poison the sovereignty
# verdict — sovereignty asks "did cutover break anything", not
# "is the cluster perfect". Caught live otech103 — infrastructure
# -config Kustomization had been NotReady for 4h pre-cutover.
version: 0.1.13
sourceRef:
kind: HelmRepository
name: bp-self-sovereign-cutover

View File

@ -1,6 +1,6 @@
apiVersion: v2
name: bp-self-sovereign-cutover
version: 0.1.12
version: 0.1.13
description: |
Catalyst Self-Sovereignty Cutover Blueprint. Installs DORMANT — this
chart ships eight step ConfigMaps (PodSpec ConfigMaps, one per step),

View File

@ -102,37 +102,55 @@ data:
*gitea-http*|*gitea.*) : ;;
*) echo " FAIL — catalyst-api env still upstream"; exit 1 ;;
esac
echo "[egress-block-test] architectural pivots verified — entering ${DURATION_SECONDS}s survival window"
echo "[egress-block-test] capturing baseline NotReady set"
baseline_hr=$(kubectl get helmreleases.helm.toolkit.fluxcd.io -A \
-o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}={.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
| grep -E '=False$' | sort -u || true)
baseline_ks=$(kubectl get kustomizations.kustomize.toolkit.fluxcd.io -A \
-o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}={.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
| grep -E '=False$' | sort -u || true)
if [ -n "${baseline_hr}${baseline_ks}" ]; then
echo "[egress-block-test] baseline pre-existing NotReady items (excluded from regression check):"
[ -n "${baseline_hr}" ] && echo "${baseline_hr}" | sed 's/^/ baseline HR /'
[ -n "${baseline_ks}" ] && echo "${baseline_ks}" | sed 's/^/ baseline KS /'
else
echo "[egress-block-test] baseline clean — no pre-existing NotReady items"
fi
echo "[egress-block-test] entering ${DURATION_SECONDS}s survival window — only NEW NotReady regressions count as failure"
start=$(date +%s)
deadline=$((start + DURATION_SECONDS))
poll_interval=30
failures=0
new_failures=0
while [ "$(date +%s)" -lt "${deadline}" ]; do
hr_not_ready=$(kubectl get helmreleases.helm.toolkit.fluxcd.io -A \
-o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}={.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
| grep -E '=False$' || true)
| grep -E '=False$' | sort -u || true)
ks_not_ready=$(kubectl get kustomizations.kustomize.toolkit.fluxcd.io -A \
-o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}={.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
| grep -E '=False$' || true)
| grep -E '=False$' | sort -u || true)
if [ -n "${hr_not_ready}${ks_not_ready}" ]; then
echo "[egress-block-test] NotReady detected at $(($(date +%s) - start))s:"
[ -n "${hr_not_ready}" ] && echo "${hr_not_ready}" | sed 's/^/ HR /'
[ -n "${ks_not_ready}" ] && echo "${ks_not_ready}" | sed 's/^/ KS /'
failures=$((failures+1))
# Diff against baseline — only new entries count as a regression.
new_hr=$(comm -13 <(echo "${baseline_hr}") <(echo "${hr_not_ready}"))
new_ks=$(comm -13 <(echo "${baseline_ks}") <(echo "${ks_not_ready}"))
if [ -n "${new_hr}${new_ks}" ]; then
echo "[egress-block-test] NEW NotReady at $(($(date +%s) - start))s (regression):"
[ -n "${new_hr}" ] && echo "${new_hr}" | sed 's/^/ NEW HR /'
[ -n "${new_ks}" ] && echo "${new_ks}" | sed 's/^/ NEW KS /'
new_failures=$((new_failures+1))
fi
sleep "${poll_interval}"
done
echo "[egress-block-test] window complete; observed-failure-cycles=${failures}"
if [ "${failures}" -gt 0 ]; then
echo "[egress-block-test] cluster did NOT survive egress block — sovereignty proof FAILED"
echo "[egress-block-test] window complete; new-regression-cycles=${new_failures}"
if [ "${new_failures}" -gt 0 ]; then
echo "[egress-block-test] cluster developed NEW NotReady items during the window — sovereignty proof FAILED"
exit 1
fi
echo "[egress-block-test] cluster survived ${DURATION_SECONDS}s egress block — sovereignty proof PASSED"
echo "[egress-block-test] no new regressions during ${DURATION_SECONDS}s window — sovereignty proof PASSED"
resources:
requests: { cpu: 50m, memory: 64Mi }
limits: { memory: 256Mi }