fix(provisioner): omit empty SKU keys from tfvars so variables.tf defaults take effect (#742)
* fix(provisioner): cost-optimized default sizes — cpx21 CP + cpx31 workers (38% saving) The new Sovereign default after PR #736 / #738 / #739 was 1× CPX32 control plane + 2× CPX32 workers — €33/mo per Sovereign. CPX32 is over-provisioned for the CP working set: the CP carries only k3s (apiserver/etcd/scheduler/ controller-manager) + cilium-operator + flux controllers + cert-manager + sealed-secrets — NOT the heavy bp-keycloak/cnpg/harbor/openbao/grafana stack (those land on workers because the bootstrap-kit explicitly schedules them off the CP taint). CP RAM budget: etcd ~512 MB + control plane ~1.5 GB + cilium/flux/ cert-manager/sealed-secrets ~1 GB + OS ~512 MB ≈ 3.5 GB — fits CPX21's 4 GB. Workers stay at 8 GB on CPX31 since RAM is the binding constraint for the bootstrap-kit's worker pods, not vCPU. New default per Sovereign: | Component | Old | New | Savings | |-----------------|-----------------|-----------------|---------| | Control plane | CPX32 (€11/mo) | CPX21 (€5.5/mo) | €5.5 | | Worker × 2 | CPX32 × 2 (€22) | CPX31 × 2 (€15) | €7 | | TOTAL | €33/mo | €20.5/mo | 38% | Multi-node horizontal-scale agreement (issue #733) preserved: still 1 CP + 2 workers minimum from day one. Files changed: - infra/hetzner/variables.tf control_plane_size default cpx32 → cpx21 worker_size default cpx32 → cpx31 Validation regex unchanged (cxNN | cpxNN | ccxNN | caxNN). - products/catalyst/bootstrap/ui/src/shared/constants/providerSizes.ts Add CPX11, CPX21, CPX31 catalog entries. Move recommended:true from CPX32 → CPX21 (control-plane default). Add defaultWorkerSizeId() — Hetzner returns 'cpx31', other providers fall through to defaultNodeSizeId() symmetric default. - products/catalyst/bootstrap/ui/src/pages/wizard/steps/StepProvider.tsx First-visit useEffect + handleSelectProvider now call defaultWorkerSizeId(provider) for the worker SKU instead of mirroring the CP SKU. Comment updated naming the cost-optimised pair. - products/catalyst/bootstrap/ui/e2e/cosmetic-guards.spec.ts Recommended-Hetzner-SKU set assertion: ['cpx32'] → ['cpx21']. If a Sovereign exhibits CP RAM pressure with this default, the next safe stop UP is cpx31 (4 vCPU / 8 GB, ~€7.5/mo) — never back to cpx32. Closes #740. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * fix(provisioner): omit empty control_plane_size/worker_size from tfvars so variables.tf defaults take effect Live failure on otech85 (DID a3c32a2b82758007, 2026-05-04 11:04:27Z): the autopilot zero-touch verification cycle launched against PR #741's new cost-optimized defaults (cpx21 CP + cpx31 workers) tripped a tofu plan failure 7 seconds in. Root cause: writeTfvars unconditionally emitted "control_plane_size": "", "worker_size": "", into tofu.auto.tfvars.json when the request had no per-region SKU overrides. The empty strings overrode the variables.tf defaults ("cpx21" / "cpx31") with "" and failed the SKU regex validator at plan time: control_plane_size must match Hetzner server-type naming (cxNN | cpxNN | ccxNN | caxNN). Fix: emit the singular SKU keys only when non-empty. Operator overrides (both legacy singular fields and Regions[0] mirror) round-trip unchanged; zero-override request bodies now flow through without keys, leaving the variables.tf defaults to take effect. Tests: - TestWriteTfvars_OmitsEmptySingularSizes — proves the keys are absent when ControlPlaneSize/WorkerSize are "" (the autopilot path) - TestWriteTfvars_EmitsSingularSizesWhenSet — proves operator overrides still round-trip (regression guard) Both pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: hatiyildiz <hatiyildiz@openova.io> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
594875ae1e
commit
7ef5af79d2
@ -852,10 +852,17 @@ func writeTfvars(deployDir string, req Request) error {
|
||||
// to the OpenTofu module's for_each iteration when the multi-
|
||||
// region wiring is activated; collapsing it back to single-SKU
|
||||
// here would break the architectural shape the wizard intends.
|
||||
"control_plane_size": req.ControlPlaneSize,
|
||||
"worker_size": req.WorkerSize,
|
||||
"worker_count": req.WorkerCount,
|
||||
"ha_enabled": req.HAEnabled,
|
||||
//
|
||||
// IMPORTANT: control_plane_size / worker_size are conditionally
|
||||
// inserted below (after the literal map) when non-empty. An empty
|
||||
// string written to tofu.auto.tfvars.json OVERRIDES the variables.tf
|
||||
// default ("cpx21" / "cpx31") with "" — and "" fails the SKU regex
|
||||
// validator at plan time ("control_plane_size must match Hetzner
|
||||
// server-type naming"). Writing the keys only when set lets the
|
||||
// default-cost-optimized variables.tf defaults take effect for
|
||||
// zero-override request bodies.
|
||||
"worker_count": req.WorkerCount,
|
||||
"ha_enabled": req.HAEnabled,
|
||||
|
||||
// Per-region payload — emitted as a list of objects so the
|
||||
// OpenTofu module can iterate (variable "regions" in
|
||||
@ -959,6 +966,18 @@ func writeTfvars(deployDir string, req Request) error {
|
||||
"object_storage_bucket_name": req.ObjectStorageBucket,
|
||||
}
|
||||
|
||||
// Conditionally include singular SKU fields. variables.tf in
|
||||
// infra/hetzner/ declares "cpx21" / "cpx31" defaults for the
|
||||
// cost-optimized 1× CP + 2× worker topology; writing an empty
|
||||
// string here would override the default with "" and fail the
|
||||
// SKU regex validator at `tofu plan`. Only emit when set.
|
||||
if strings.TrimSpace(req.ControlPlaneSize) != "" {
|
||||
vars["control_plane_size"] = req.ControlPlaneSize
|
||||
}
|
||||
if strings.TrimSpace(req.WorkerSize) != "" {
|
||||
vars["worker_size"] = req.WorkerSize
|
||||
}
|
||||
|
||||
raw, err := json.MarshalIndent(vars, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@ -403,3 +403,97 @@ func TestRequest_ObjectStorageSecretKey_Serialized(t *testing.T) {
|
||||
t.Fatalf("ObjectStorageSecretKey must serialise to wire (wizard payload depends on it):\n%s", raw)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWriteTfvars_OmitsEmptySingularSizes proves writeTfvars does NOT emit
|
||||
// "control_plane_size": "" / "worker_size": "" when the legacy singular
|
||||
// fields are empty. An empty string in tofu.auto.tfvars.json overrides the
|
||||
// variables.tf default ("cpx21" / "cpx31") with "" — which fails the SKU
|
||||
// regex validator at `tofu plan`. Live failure surfaced on otech85
|
||||
// (DID a3c32a2b82758007, 2026-05-04 11:04:27Z) when the autopilot launched
|
||||
// the cost-optimized-defaults verification cycle without per-request
|
||||
// SKU overrides.
|
||||
func TestWriteTfvars_OmitsEmptySingularSizes(t *testing.T) {
|
||||
dir, err := os.MkdirTemp("", "writeTfvars-*")
|
||||
if err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
// Zero-override request: every singular SKU field empty, WorkerCount
|
||||
// set explicitly to 2 (the wizard default). Every required identity
|
||||
// + secret field populated so writeTfvars can run.
|
||||
req := Request{
|
||||
SovereignFQDN: "otech85.omani.works",
|
||||
OrgName: "Acme",
|
||||
OrgEmail: "ops@acme.io",
|
||||
HetznerToken: "tok",
|
||||
HetznerProjectID: "p1",
|
||||
Region: "fsn1",
|
||||
WorkerCount: 2,
|
||||
// ControlPlaneSize / WorkerSize intentionally empty.
|
||||
}
|
||||
if err := writeTfvars(dir, req); err != nil {
|
||||
t.Fatalf("writeTfvars: %v", err)
|
||||
}
|
||||
|
||||
raw, err := os.ReadFile(dir + "/tofu.auto.tfvars.json")
|
||||
if err != nil {
|
||||
t.Fatalf("read tfvars: %v", err)
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(raw, &parsed); err != nil {
|
||||
t.Fatalf("parse tfvars: %v", err)
|
||||
}
|
||||
if _, ok := parsed["control_plane_size"]; ok {
|
||||
t.Fatalf("control_plane_size MUST be omitted when empty (variables.tf default cpx21 takes effect). Got: %s", string(raw))
|
||||
}
|
||||
if _, ok := parsed["worker_size"]; ok {
|
||||
t.Fatalf("worker_size MUST be omitted when empty (variables.tf default cpx31 takes effect). Got: %s", string(raw))
|
||||
}
|
||||
// worker_count is always emitted (zero is a valid solo-Sovereign
|
||||
// choice; the wizard always sends 2 by default).
|
||||
if v, ok := parsed["worker_count"]; !ok || v.(float64) != 2 {
|
||||
t.Fatalf("worker_count must be emitted with the request value (2). Got: %v", parsed["worker_count"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestWriteTfvars_EmitsSingularSizesWhenSet proves writeTfvars DOES emit
|
||||
// the singular SKU fields when the operator sets them explicitly. Guards
|
||||
// against a regression where an over-eager omission rule drops legitimate
|
||||
// operator overrides.
|
||||
func TestWriteTfvars_EmitsSingularSizesWhenSet(t *testing.T) {
|
||||
dir, err := os.MkdirTemp("", "writeTfvars-*")
|
||||
if err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
req := Request{
|
||||
SovereignFQDN: "otech85.omani.works",
|
||||
OrgName: "Acme",
|
||||
OrgEmail: "ops@acme.io",
|
||||
HetznerToken: "tok",
|
||||
HetznerProjectID: "p1",
|
||||
Region: "fsn1",
|
||||
ControlPlaneSize: "cpx52",
|
||||
WorkerSize: "cpx41",
|
||||
WorkerCount: 3,
|
||||
}
|
||||
if err := writeTfvars(dir, req); err != nil {
|
||||
t.Fatalf("writeTfvars: %v", err)
|
||||
}
|
||||
raw, err := os.ReadFile(dir + "/tofu.auto.tfvars.json")
|
||||
if err != nil {
|
||||
t.Fatalf("read tfvars: %v", err)
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(raw, &parsed); err != nil {
|
||||
t.Fatalf("parse tfvars: %v", err)
|
||||
}
|
||||
if v, _ := parsed["control_plane_size"].(string); v != "cpx52" {
|
||||
t.Fatalf("control_plane_size must round-trip operator override: got %q want cpx52", v)
|
||||
}
|
||||
if v, _ := parsed["worker_size"].(string); v != "cpx41" {
|
||||
t.Fatalf("worker_size must round-trip operator override: got %q want cpx41", v)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user