openova/.github/workflows/catalyst-build.yaml

name: Build & Deploy Catalyst

# Event-driven only. Cron is forbidden — the OpenOva architecture is
# event-driven end to end (Flux dependsOn, NATS JetStream, SSE,
# Helm post-install hooks). `push` on the relevant paths is the
# canonical trigger; `workflow_dispatch` exists for ad-hoc re-runs
# without a code change.
on:
  push:
    branches: [main]
    paths:
      - 'core/console/**'
      - 'core/admin/**'
      - 'core/marketplace/**'
      - 'core/marketplace-api/**'
      - 'products/catalyst/bootstrap/**'
      - 'products/catalyst/chart/**'
      - '.github/workflows/catalyst-build.yaml'
  workflow_dispatch:

env:
  REGISTRY: ghcr.io
  UI_IMAGE: ghcr.io/openova-io/openova/catalyst-ui
  API_IMAGE: ghcr.io/openova-io/openova/catalyst-api

jobs:
  build-ui:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    outputs:
      sha_short: ${{ steps.vars.outputs.sha_short }}
    steps:
      - name: Checkout openova-private
        uses: actions/checkout@v4

      - name: Checkout openova (public source)
        uses: actions/checkout@v4
        with:
          repository: openova-io/openova
          path: openova-src

      - name: Set short SHA
        id: vars
        run: echo "sha_short=$(echo $GITHUB_SHA | head -c 7)" >> "$GITHUB_OUTPUT"

      - name: Login to GHCR
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Build UI image (test)
        uses: docker/build-push-action@v6
        with:
          # Build context is the repo root so the Vite prebuild script can
          # walk platform/, products/, clusters/_template/bootstrap-kit/ to
          # populate the catalog + BOOTSTRAP_KIT. The Containerfile fails
          # the build if any of those dirs is missing.
          context: openova-src
          file: openova-src/products/catalyst/bootstrap/ui/Containerfile
          push: false
          load: true
          tags: ${{ env.UI_IMAGE }}:test
          build-args: VITE_APP_MODE=selfhosted

      - name: Smoke test UI
        run: |
          docker run -d --name smoke-ui -p 8080:8080 ${{ env.UI_IMAGE }}:test
          sleep 3
          STATUS=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:8080/)
          if [ "$STATUS" != "200" ]; then
            echo "Smoke test failed: expected 200 from /, got $STATUS"
            docker stop smoke-ui
            exit 1
          fi
          echo "Smoke test (root) passed: HTTP $STATUS"

          # Logo path regression guard (#173): the wizard's StepComponents
          # references `${BASE}component-logos/<id>.<ext>` where BASE is the
          # Vite base and the extension is whatever the upstream brand mark
          # is published as (some are SVG, some are PNG — we use the canonical
          # upstream asset rather than auto-converting). Inside the catalyst-
          # ui pod nginx serves the file at /component-logos/<id>.<ext>
          # (Traefik strips /sovereign before proxying — see nginx.conf
          # comment). We list every logo path that componentGroups.ts
          # references, so a missing or mis-cased asset fails the build,
          # not the user.
          for path in \
              component-logos/cilium.svg \
              component-logos/flux.svg \
              component-logos/harbor.svg \
              component-logos/grafana.svg \
              component-logos/keycloak.svg \
              component-logos/openbao.svg \
              component-logos/langfuse.png \
              component-logos/vllm.png \
              component-logos/temporal.svg \
              component-logos/stalwart.svg \
              component-logos/cnpg.svg \
              component-logos/loki.png \
              component-logos/mimir.png \
              component-logos/tempo.svg \
              component-logos/ntfy.svg \
              component-logos/ferretdb.png \
              component-logos/openmeter.png \
              component-logos/coraza.png \
              component-logos/external-dns.png \
              component-logos/netbird.png \
              component-logos/strongswan.png \
              component-logos/trivy.png \
              component-logos/syft-grype.png ; do
            CODE=$(curl -s -o /dev/null -w '%{http_code}' \
              "http://localhost:8080/${path}")
            if [ "$CODE" != "200" ]; then
              echo "Logo smoke FAILED: /${path} returned $CODE"
              docker stop smoke-ui
              exit 1
            fi
            echo "Logo smoke OK: /${path} HTTP $CODE"
          done

          # Bootstrap-kit regression guard: the Provision page reads
          # BOOTSTRAP_KIT from the bundled catalog.generated.ts to render
          # the per-Blueprint bubbles. An earlier revision shipped with a
          # docker context that didn't include clusters/_template/bootstrap-kit/
          # so the prebuild script silently produced an empty array — the
          # page rendered only the 2 supernodes. Asserting the bundle
          # contains every bp-* id makes that regression impossible.
          #
          # Implementation note: we extract the entire bundle once via
          # `tar c -C ... --transform`, then grep locally. Earlier we ran
          # `grep` inside docker run -c "..." and the nested quote escaping
          # produced false negatives (bp-cilium was in the bundle but the
          # grep argument matched a literal `"bp-cilium"` whose surrounding
          # quotes were eaten by shell expansion). Local grep on the
          # extracted file removes that whole class of escaping bugs.
          BUNDLE_TMP=$(mktemp)
          docker run --rm --entrypoint sh ${{ env.UI_IMAGE }}:test \
            -c 'cat $(find /usr/share/nginx/html/assets -name "index-*.js" | head -1)' \
            > "$BUNDLE_TMP"
          BUNDLE_BYTES=$(wc -c < "$BUNDLE_TMP")
          echo "Bundle size: $BUNDLE_BYTES bytes"
          if [ "$BUNDLE_BYTES" -lt 100000 ]; then
            echo "Bootstrap-kit smoke FAILED: bundle suspiciously small ($BUNDLE_BYTES bytes)"
            docker stop smoke-ui
            exit 1
          fi
          for bp in bp-cilium bp-cert-manager bp-flux bp-crossplane bp-sealed-secrets \
                    bp-spire bp-nats-jetstream bp-openbao bp-keycloak bp-gitea ; do
            if ! grep -q -F "$bp" "$BUNDLE_TMP" ; then
              echo "Bootstrap-kit smoke FAILED: ${bp} missing from bundle"
              docker stop smoke-ui
              exit 1
            fi
            echo "Bootstrap-kit smoke OK: ${bp}"
          done
          rm -f "$BUNDLE_TMP"

          docker stop smoke-ui
          echo "All smoke tests passed."

      - name: Push UI image
        uses: docker/build-push-action@v6
        with:
          # Build context is the repo root so the Vite prebuild script can
          # walk platform/, products/, clusters/_template/bootstrap-kit/ to
          # populate the catalog + BOOTSTRAP_KIT. The Containerfile fails
          # the build if any of those dirs is missing.
          context: openova-src
          file: openova-src/products/catalyst/bootstrap/ui/Containerfile
          push: true
          tags: |
            ${{ env.UI_IMAGE }}:${{ steps.vars.outputs.sha_short }}
            ${{ env.UI_IMAGE }}:latest
          build-args: VITE_APP_MODE=selfhosted

  build-api:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    outputs:
      sha_short: ${{ steps.vars.outputs.sha_short }}
    steps:
      - name: Checkout openova-private
        uses: actions/checkout@v4

      - name: Checkout openova (public source)
        uses: actions/checkout@v4
        with:
          repository: openova-io/openova
          path: openova-src

      - name: Set short SHA
        id: vars
        run: echo "sha_short=$(echo $GITHUB_SHA | head -c 7)" >> "$GITHUB_OUTPUT"

      - name: Login to GHCR
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      # Build context is the public openova repo root (openova-src/), not just
      # products/catalyst/bootstrap/api/, because the runtime image bundles the
      # canonical OpenTofu module from infra/hetzner/. The Containerfile's
      # COPY paths are written relative to the repo root accordingly. Without
      # this, /infra/hetzner/ is missing inside the image and every Launch
      # fails with `stage tofu module: open /infra/hetzner: no such file or
      # directory`.
      - name: Build API image (test)
        uses: docker/build-push-action@v6
        with:
          context: openova-src
          file: openova-src/products/catalyst/bootstrap/api/Containerfile
          push: false
          load: true
          tags: ${{ env.API_IMAGE }}:test

      # Smoke test — the catalyst-api Pod is the OpenTofu runner, so the .tf
      # sources MUST be present at /infra/hetzner/ inside the image. Anything
      # less ships a broken image that fails on every Launch with `stage tofu
      # module: open /infra/hetzner: no such file or directory`. Failure of
      # this step fails the build.
      - name: Smoke test API — verify infra/hetzner/ is bundled
        run: |
          set -euo pipefail
          LISTING=$(docker run --rm --entrypoint sh ${{ env.API_IMAGE }}:test \
            -c 'ls -la /infra/hetzner/')
          echo "$LISTING"
          for f in main.tf variables.tf outputs.tf versions.tf \
                   cloudinit-control-plane.tftpl cloudinit-worker.tftpl ; do
            if ! echo "$LISTING" | grep -q " ${f}\$"; then
              echo "Smoke test FAILED: /infra/hetzner/${f} missing from image"
              exit 1
            fi
            echo "Smoke test OK: /infra/hetzner/${f} present"
          done
          echo "All API smoke tests passed."

      # tofu CLI smoke test — the runtime image bundles the OpenTofu CLI
      # because internal/provisioner execs `tofu init / plan / apply` (see
      # internal/provisioner/provisioner.go runTofu()). Without the binary
      # every Launch SSE stream returns:
      #   tofu init: exec: "tofu": executable file not found in $PATH
      # We assert (a) `tofu version` succeeds inside the image and (b) the
      # output matches the EXPECTED_TOFU_VERSION pinned here, which must
      # stay in lockstep with the TOFU_VERSION ARG in the Containerfile.
      # When you bump the version in the Containerfile, bump it here too.
      - name: Smoke test API — verify OpenTofu CLI is installed
        env:
          EXPECTED_TOFU_VERSION: 1.11.6
        run: |
          set -euo pipefail
          OUT=$(docker run --rm --entrypoint sh ${{ env.API_IMAGE }}:test \
            -c 'tofu version')
          echo "$OUT"
          if ! echo "$OUT" | grep -q "^OpenTofu v${EXPECTED_TOFU_VERSION}\$"; then
            echo "Smoke test FAILED: expected 'OpenTofu v${EXPECTED_TOFU_VERSION}', got:"
            echo "$OUT"
            exit 1
          fi
          echo "Smoke test OK: OpenTofu v${EXPECTED_TOFU_VERSION} present on PATH."

          # Re-assert the binary is executable for the actual runtime UID
          # (65534, set in api-deployment.yaml securityContext.runAsUser).
          # `--user` overrides the image USER directive, simulating the K8s
          # securityContext: a missing exec bit or wrong owner here would
          # surface as a Launch failure in production, never in CI, so we
          # gate it at build time.
          docker run --rm --user 65534:65534 --entrypoint sh \
            ${{ env.API_IMAGE }}:test -c 'tofu version | head -1'
          echo "Smoke test OK: tofu executable as UID 65534."

      - name: Push API image
        uses: docker/build-push-action@v6
        with:
          context: openova-src
          file: openova-src/products/catalyst/bootstrap/api/Containerfile
          push: true
          tags: |
            ${{ env.API_IMAGE }}:${{ steps.vars.outputs.sha_short }}
            ${{ env.API_IMAGE }}:latest

  deploy:
    needs: [build-ui, build-api]
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
      - name: Checkout
        uses: actions/checkout@v4

      - name: Update deployment manifests with new SHA tags
        env:
          SHA_SHORT: ${{ needs.build-ui.outputs.sha_short }}
        run: |
          DEPLOY_DIR="products/catalyst/chart/templates"

          sed -i "s|image: ${UI_IMAGE}:.*|image: ${UI_IMAGE}:${SHA_SHORT}|" \
            "${DEPLOY_DIR}/ui-deployment.yaml"

          sed -i "s|image: ${API_IMAGE}:.*|image: ${API_IMAGE}:${SHA_SHORT}|" \
            "${DEPLOY_DIR}/api-deployment.yaml"

          echo "Updated manifests to SHA ${SHA_SHORT}:"
          grep "image:" "${DEPLOY_DIR}/ui-deployment.yaml"
          grep "image:" "${DEPLOY_DIR}/api-deployment.yaml"

      - name: Commit and push manifest updates
        env:
          SHA_SHORT: ${{ needs.build-ui.outputs.sha_short }}
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"
          git add products/
          git diff --staged --quiet && echo "No changes to commit" && exit 0
          git commit -m "deploy: update catalyst images to ${SHA_SHORT}"
          git push