# ============================================================================= # .woodpecker/nomad-validate.yml — Static validation for Nomad+Vault artifacts # # Part of the Nomad+Vault migration (S0.5, issue #825). Locks in the # "no-ad-hoc-steps" principle: every HCL/shell artifact under nomad/ or # lib/init/nomad/, plus the `disinto init` dispatcher, gets checked # before it can land. # # Triggers on PRs (and pushes) that touch any of: # nomad/** — HCL configs (server, client, vault) # lib/init/nomad/** — cluster-up / install / systemd / vault-init # bin/disinto — `disinto init --backend=nomad` dispatcher # tests/disinto-init-nomad.bats — the bats suite itself # .woodpecker/nomad-validate.yml — the pipeline definition # # Steps (all fail-closed — any error blocks merge): # 1. nomad-config-validate — `nomad config validate` on server + client HCL # 2. nomad-job-validate — `nomad job validate` looped over every # nomad/jobs/*.hcl (new jobspecs get # CI coverage automatically) # 3. vault-operator-diagnose — `vault operator diagnose` syntax check on vault.hcl # 4. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto # 5. bats-init-nomad — `disinto init --backend=nomad --dry-run` smoke tests # # Pinned image versions match lib/init/nomad/install.sh (nomad 1.9.5 / # vault 1.18.5). Bump there AND here together — drift = CI passing on # syntax the runtime would reject. # ============================================================================= when: - event: [push, pull_request] path: - "nomad/**" - "lib/init/nomad/**" - "bin/disinto" - "tests/disinto-init-nomad.bats" - ".woodpecker/nomad-validate.yml" # Authenticated clone — same pattern as .woodpecker/ci.yml. Forgejo is # configured with REQUIRE_SIGN_IN, so anonymous git clones fail (exit 128). # FORGE_TOKEN is injected globally via WOODPECKER_ENVIRONMENT. clone: git: image: alpine/git commands: - AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|") - git clone --depth 1 "$AUTH_URL" . - git fetch --depth 1 origin "$CI_COMMIT_REF" - git checkout FETCH_HEAD steps: # ── 1. Nomad HCL syntax check ──────────────────────────────────────────── # `nomad config validate` parses server.hcl + client.hcl and fails on any # HCL/semantic error (unknown block, invalid port range, bad driver cfg). # vault.hcl is excluded — it's a Vault config, not Nomad, so it goes # through the vault-operator-diagnose step instead. - name: nomad-config-validate image: hashicorp/nomad:1.9.5 commands: - nomad version - nomad config validate nomad/server.hcl nomad/client.hcl # ── 2. Nomad jobspec HCL syntax check ──────────────────────────────────── # `nomad job validate` is a *different* tool from `nomad config validate` — # the former parses jobspec HCL (job/group/task blocks, driver config, # volume refs, network ports), the latter parses agent config HCL # (server/client blocks). Running step 1 on a jobspec would reject it # with "unknown block 'job'", and vice versa. Hence two separate steps. # # Validation is offline: no running Nomad server is required (exit 0 on # valid HCL, 1 on syntax/semantic error). The CLI takes a single path # argument so we loop over every `*.hcl` file under nomad/jobs/ — # that way a new jobspec PR gets CI coverage automatically (no separate # "edit the pipeline" step to forget). The `.hcl` suffix is the naming # convention: anything else in nomad/jobs/ is deliberately not validated # by this step. # # `[ -f "$f" ]` guards against the no-match case: POSIX sh does not # nullglob, so an empty jobs/ directory would leave the literal glob in # "$f" and fail. Today forgejo.hcl exists, but the guard keeps the # step safe during any future transient empty state. # # Scope note: offline validate catches jobspec-level errors (unknown # stanzas, missing required fields, wrong value types, invalid driver # config). It does NOT resolve cross-file references like host_volume # source names against nomad/client.hcl — that mismatch surfaces at # scheduling time on the live cluster, not here. The paired-write rule # in nomad/AGENTS.md ("add to both client.hcl and cluster-up.sh") is the # primary guardrail for that class of drift. - name: nomad-job-validate image: hashicorp/nomad:1.9.5 commands: - | set -e for f in nomad/jobs/*.hcl; do [ -f "$f" ] || continue echo "validating jobspec: $f" nomad job validate "$f" done # ── 3. Vault HCL syntax check ──────────────────────────────────────────── # `vault operator diagnose` loads the config and runs a suite of checks. # Exit codes: # 0 — all checks green # 1 — at least one hard failure (bad HCL, bad schema, unreachable storage) # 2 — advisory warnings only (no hard failure) # Our factory dev-box vault.hcl deliberately runs TLS-disabled on a # localhost-only listener (documented in nomad/vault.hcl), which triggers # an advisory "Check Listener TLS" warning → exit 2. The config still # parses, so we tolerate exit 2 and fail only on exit 1 or crashes. # -skip=storage/-skip=listener disables the runtime-only checks (vault's # container has /vault/file so storage is fine, but explicit skip is cheap # insurance against future container-image drift). - name: vault-operator-diagnose image: hashicorp/vault:1.18.5 commands: - | rc=0 vault operator diagnose -config=nomad/vault.hcl -skip=storage -skip=listener || rc=$? case "$rc" in 0) echo "vault config: all checks green" ;; 2) echo "vault config: parse OK (rc=2 — advisory warnings only; TLS-disabled on localhost listener is by design)" ;; *) echo "vault config: hard failure (rc=$rc)" >&2; exit "$rc" ;; esac # ── 4. Shellcheck ──────────────────────────────────────────────────────── # Covers the new lib/init/nomad/*.sh scripts plus bin/disinto (which owns # the backend dispatcher). bin/disinto has no .sh extension so the # repo-wide shellcheck in .woodpecker/ci.yml skips it — this step is the # one place it gets checked. - name: shellcheck-nomad image: koalaman/shellcheck-alpine:stable commands: - shellcheck --severity=warning lib/init/nomad/*.sh bin/disinto # ── 5. bats: `disinto init --backend=nomad --dry-run` ──────────────────── # Smoke-tests the CLI dispatcher: both --backend=nomad variants exit 0 # with the expected step list, and --backend=docker stays on the docker # path (regression guard). Pure dry-run — no sudo, no network. - name: bats-init-nomad image: alpine:3.19 commands: - apk add --no-cache bash bats - bats tests/disinto-init-nomad.bats