fix(self-sovereign-cutover): Step-8 baseline-diff (only NEW regressions count) (#858)
Live otech103: Step-8 survival window failed because infrastructure-config Kustomization had been NotReady for 4h pre-cutover (Crossplane provider CRD ordering — unrelated to sovereignty). Sovereignty proof asks 'did cutover break anything', not 'is the cluster perfect'. Capture baseline NotReady set before the window, only fail on NEW additions during. Bumps 0.1.12 → 0.1.13 + slot 06a pin lockstep. Co-authored-by: Hatice Yildiz <hatice.yildiz@openova.io>
This commit is contained in:
parent
d5d1d9b2cd
commit
9b710049e3
@ -120,7 +120,14 @@ spec:
|
||||
# Flux Kustomization re-applies from bootstrap-kit slots after
|
||||
# Step-6's patch. Data-plane impact null — they're not pulled
|
||||
# again until next cutover cycle. Caught live otech103.
|
||||
version: 0.1.12
|
||||
# 0.1.13: Step-8 survival window captures BASELINE NotReady set
|
||||
# before entering the window, then only fails on NEW Ready=False
|
||||
# transitions (regressions). Pre-existing failures (Crossplane
|
||||
# provider CRD ordering, etc.) don't poison the sovereignty
|
||||
# verdict — sovereignty asks "did cutover break anything", not
|
||||
# "is the cluster perfect". Caught live otech103 — infrastructure
|
||||
# -config Kustomization had been NotReady for 4h pre-cutover.
|
||||
version: 0.1.13
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: bp-self-sovereign-cutover
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
apiVersion: v2
|
||||
name: bp-self-sovereign-cutover
|
||||
version: 0.1.12
|
||||
version: 0.1.13
|
||||
description: |
|
||||
Catalyst Self-Sovereignty Cutover Blueprint. Installs DORMANT — this
|
||||
chart ships eight step ConfigMaps (PodSpec ConfigMaps, one per step),
|
||||
|
||||
@ -102,37 +102,55 @@ data:
|
||||
*gitea-http*|*gitea.*) : ;;
|
||||
*) echo " FAIL — catalyst-api env still upstream"; exit 1 ;;
|
||||
esac
|
||||
echo "[egress-block-test] architectural pivots verified — entering ${DURATION_SECONDS}s survival window"
|
||||
echo "[egress-block-test] capturing baseline NotReady set"
|
||||
baseline_hr=$(kubectl get helmreleases.helm.toolkit.fluxcd.io -A \
|
||||
-o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}={.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
|
||||
| grep -E '=False$' | sort -u || true)
|
||||
baseline_ks=$(kubectl get kustomizations.kustomize.toolkit.fluxcd.io -A \
|
||||
-o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}={.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
|
||||
| grep -E '=False$' | sort -u || true)
|
||||
if [ -n "${baseline_hr}${baseline_ks}" ]; then
|
||||
echo "[egress-block-test] baseline pre-existing NotReady items (excluded from regression check):"
|
||||
[ -n "${baseline_hr}" ] && echo "${baseline_hr}" | sed 's/^/ baseline HR /'
|
||||
[ -n "${baseline_ks}" ] && echo "${baseline_ks}" | sed 's/^/ baseline KS /'
|
||||
else
|
||||
echo "[egress-block-test] baseline clean — no pre-existing NotReady items"
|
||||
fi
|
||||
echo "[egress-block-test] entering ${DURATION_SECONDS}s survival window — only NEW NotReady regressions count as failure"
|
||||
|
||||
start=$(date +%s)
|
||||
deadline=$((start + DURATION_SECONDS))
|
||||
poll_interval=30
|
||||
failures=0
|
||||
new_failures=0
|
||||
|
||||
while [ "$(date +%s)" -lt "${deadline}" ]; do
|
||||
hr_not_ready=$(kubectl get helmreleases.helm.toolkit.fluxcd.io -A \
|
||||
-o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}={.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
|
||||
| grep -E '=False$' || true)
|
||||
| grep -E '=False$' | sort -u || true)
|
||||
ks_not_ready=$(kubectl get kustomizations.kustomize.toolkit.fluxcd.io -A \
|
||||
-o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}={.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
|
||||
| grep -E '=False$' || true)
|
||||
| grep -E '=False$' | sort -u || true)
|
||||
|
||||
if [ -n "${hr_not_ready}${ks_not_ready}" ]; then
|
||||
echo "[egress-block-test] NotReady detected at $(($(date +%s) - start))s:"
|
||||
[ -n "${hr_not_ready}" ] && echo "${hr_not_ready}" | sed 's/^/ HR /'
|
||||
[ -n "${ks_not_ready}" ] && echo "${ks_not_ready}" | sed 's/^/ KS /'
|
||||
failures=$((failures+1))
|
||||
# Diff against baseline — only new entries count as a regression.
|
||||
new_hr=$(comm -13 <(echo "${baseline_hr}") <(echo "${hr_not_ready}"))
|
||||
new_ks=$(comm -13 <(echo "${baseline_ks}") <(echo "${ks_not_ready}"))
|
||||
|
||||
if [ -n "${new_hr}${new_ks}" ]; then
|
||||
echo "[egress-block-test] NEW NotReady at $(($(date +%s) - start))s (regression):"
|
||||
[ -n "${new_hr}" ] && echo "${new_hr}" | sed 's/^/ NEW HR /'
|
||||
[ -n "${new_ks}" ] && echo "${new_ks}" | sed 's/^/ NEW KS /'
|
||||
new_failures=$((new_failures+1))
|
||||
fi
|
||||
|
||||
sleep "${poll_interval}"
|
||||
done
|
||||
|
||||
echo "[egress-block-test] window complete; observed-failure-cycles=${failures}"
|
||||
if [ "${failures}" -gt 0 ]; then
|
||||
echo "[egress-block-test] cluster did NOT survive egress block — sovereignty proof FAILED"
|
||||
echo "[egress-block-test] window complete; new-regression-cycles=${new_failures}"
|
||||
if [ "${new_failures}" -gt 0 ]; then
|
||||
echo "[egress-block-test] cluster developed NEW NotReady items during the window — sovereignty proof FAILED"
|
||||
exit 1
|
||||
fi
|
||||
echo "[egress-block-test] cluster survived ${DURATION_SECONDS}s egress block — sovereignty proof PASSED"
|
||||
echo "[egress-block-test] no new regressions during ${DURATION_SECONDS}s window — sovereignty proof PASSED"
|
||||
resources:
|
||||
requests: { cpu: 50m, memory: 64Mi }
|
||||
limits: { memory: 256Mi }
|
||||
|
||||
Loading…
Reference in New Issue
Block a user