Merge pull request 'fix: [nomad-step-0] S0.5 — Woodpecker CI validation for nomad/vault artifacts (#825)' (#833) from fix/issue-825 into main
This commit is contained in:
commit
2a7ae0b7ea
4 changed files with 303 additions and 0 deletions
102
.woodpecker/nomad-validate.yml
Normal file
102
.woodpecker/nomad-validate.yml
Normal file
|
|
@ -0,0 +1,102 @@
|
||||||
|
# =============================================================================
|
||||||
|
# .woodpecker/nomad-validate.yml — Static validation for Nomad+Vault artifacts
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S0.5, issue #825). Locks in the
|
||||||
|
# "no-ad-hoc-steps" principle: every HCL/shell artifact under nomad/ or
|
||||||
|
# lib/init/nomad/, plus the `disinto init` dispatcher, gets checked
|
||||||
|
# before it can land.
|
||||||
|
#
|
||||||
|
# Triggers on PRs (and pushes) that touch any of:
|
||||||
|
# nomad/** — HCL configs (server, client, vault)
|
||||||
|
# lib/init/nomad/** — cluster-up / install / systemd / vault-init
|
||||||
|
# bin/disinto — `disinto init --backend=nomad` dispatcher
|
||||||
|
# tests/disinto-init-nomad.bats — the bats suite itself
|
||||||
|
# .woodpecker/nomad-validate.yml — the pipeline definition
|
||||||
|
#
|
||||||
|
# Steps (all fail-closed — any error blocks merge):
|
||||||
|
# 1. nomad-config-validate — `nomad config validate` on server + client HCL
|
||||||
|
# 2. vault-operator-diagnose — `vault operator diagnose` syntax check on vault.hcl
|
||||||
|
# 3. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto
|
||||||
|
# 4. bats-init-nomad — `disinto init --backend=nomad --dry-run` smoke tests
|
||||||
|
#
|
||||||
|
# Pinned image versions match lib/init/nomad/install.sh (nomad 1.9.5 /
|
||||||
|
# vault 1.18.5). Bump there AND here together — drift = CI passing on
|
||||||
|
# syntax the runtime would reject.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
when:
|
||||||
|
- event: [push, pull_request]
|
||||||
|
path:
|
||||||
|
- "nomad/**"
|
||||||
|
- "lib/init/nomad/**"
|
||||||
|
- "bin/disinto"
|
||||||
|
- "tests/disinto-init-nomad.bats"
|
||||||
|
- ".woodpecker/nomad-validate.yml"
|
||||||
|
|
||||||
|
# Authenticated clone — same pattern as .woodpecker/ci.yml. Forgejo is
|
||||||
|
# configured with REQUIRE_SIGN_IN, so anonymous git clones fail (exit 128).
|
||||||
|
# FORGE_TOKEN is injected globally via WOODPECKER_ENVIRONMENT.
|
||||||
|
clone:
|
||||||
|
git:
|
||||||
|
image: alpine/git
|
||||||
|
commands:
|
||||||
|
- AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
|
||||||
|
- git clone --depth 1 "$AUTH_URL" .
|
||||||
|
- git fetch --depth 1 origin "$CI_COMMIT_REF"
|
||||||
|
- git checkout FETCH_HEAD
|
||||||
|
|
||||||
|
steps:
|
||||||
|
# ── 1. Nomad HCL syntax check ────────────────────────────────────────────
|
||||||
|
# `nomad config validate` parses server.hcl + client.hcl and fails on any
|
||||||
|
# HCL/semantic error (unknown block, invalid port range, bad driver cfg).
|
||||||
|
# vault.hcl is excluded — it's a Vault config, not Nomad, so it goes
|
||||||
|
# through the vault-operator-diagnose step instead.
|
||||||
|
- name: nomad-config-validate
|
||||||
|
image: hashicorp/nomad:1.9.5
|
||||||
|
commands:
|
||||||
|
- nomad config validate nomad/server.hcl nomad/client.hcl
|
||||||
|
|
||||||
|
# ── 2. Vault HCL syntax check ────────────────────────────────────────────
|
||||||
|
# `vault operator diagnose` loads the config and runs a suite of checks.
|
||||||
|
# Exit codes:
|
||||||
|
# 0 — all checks green
|
||||||
|
# 1 — at least one hard failure (bad HCL, bad schema, unreachable storage)
|
||||||
|
# 2 — advisory warnings only (no hard failure)
|
||||||
|
# Our factory dev-box vault.hcl deliberately runs TLS-disabled on a
|
||||||
|
# localhost-only listener (documented in nomad/vault.hcl), which triggers
|
||||||
|
# an advisory "Check Listener TLS" warning → exit 2. The config still
|
||||||
|
# parses, so we tolerate exit 2 and fail only on exit 1 or crashes.
|
||||||
|
# -skip=storage/-skip=listener disables the runtime-only checks (vault's
|
||||||
|
# container has /vault/file so storage is fine, but explicit skip is cheap
|
||||||
|
# insurance against future container-image drift).
|
||||||
|
- name: vault-operator-diagnose
|
||||||
|
image: hashicorp/vault:1.18.5
|
||||||
|
commands:
|
||||||
|
- |
|
||||||
|
rc=0
|
||||||
|
vault operator diagnose -config=nomad/vault.hcl -skip=storage -skip=listener || rc=$?
|
||||||
|
case "$rc" in
|
||||||
|
0) echo "vault config: all checks green" ;;
|
||||||
|
2) echo "vault config: parse OK (rc=2 — advisory warnings only; TLS-disabled on localhost listener is by design)" ;;
|
||||||
|
*) echo "vault config: hard failure (rc=$rc)" >&2; exit "$rc" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# ── 3. Shellcheck ────────────────────────────────────────────────────────
|
||||||
|
# Covers the new lib/init/nomad/*.sh scripts plus bin/disinto (which owns
|
||||||
|
# the backend dispatcher). bin/disinto has no .sh extension so the
|
||||||
|
# repo-wide shellcheck in .woodpecker/ci.yml skips it — this step is the
|
||||||
|
# one place it gets checked.
|
||||||
|
- name: shellcheck-nomad
|
||||||
|
image: koalaman/shellcheck-alpine:stable
|
||||||
|
commands:
|
||||||
|
- shellcheck --severity=warning lib/init/nomad/*.sh bin/disinto
|
||||||
|
|
||||||
|
# ── 4. bats: `disinto init --backend=nomad --dry-run` ────────────────────
|
||||||
|
# Smoke-tests the CLI dispatcher: both --backend=nomad variants exit 0
|
||||||
|
# with the expected step list, and --backend=docker stays on the docker
|
||||||
|
# path (regression guard). Pure dry-run — no sudo, no network.
|
||||||
|
- name: bats-init-nomad
|
||||||
|
image: alpine:3.19
|
||||||
|
commands:
|
||||||
|
- apk add --no-cache bash bats
|
||||||
|
- bats tests/disinto-init-nomad.bats
|
||||||
|
|
@ -207,18 +207,21 @@ generate_compose() {
|
||||||
|
|
||||||
# Generate docker/agents/ files if they don't already exist.
|
# Generate docker/agents/ files if they don't already exist.
|
||||||
# (Implementation in lib/generators.sh)
|
# (Implementation in lib/generators.sh)
|
||||||
|
# shellcheck disable=SC2120 # passthrough wrapper; forwards any future args to impl
|
||||||
generate_agent_docker() {
|
generate_agent_docker() {
|
||||||
_generate_agent_docker_impl "$@"
|
_generate_agent_docker_impl "$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generate docker/Caddyfile template for edge proxy.
|
# Generate docker/Caddyfile template for edge proxy.
|
||||||
# (Implementation in lib/generators.sh)
|
# (Implementation in lib/generators.sh)
|
||||||
|
# shellcheck disable=SC2120 # passthrough wrapper; forwards any future args to impl
|
||||||
generate_caddyfile() {
|
generate_caddyfile() {
|
||||||
_generate_caddyfile_impl "$@"
|
_generate_caddyfile_impl "$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generate docker/index.html default page.
|
# Generate docker/index.html default page.
|
||||||
# (Implementation in lib/generators.sh)
|
# (Implementation in lib/generators.sh)
|
||||||
|
# shellcheck disable=SC2120 # passthrough wrapper; forwards any future args to impl
|
||||||
generate_staging_index() {
|
generate_staging_index() {
|
||||||
_generate_staging_index_impl "$@"
|
_generate_staging_index_impl "$@"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
92
nomad/AGENTS.md
Normal file
92
nomad/AGENTS.md
Normal file
|
|
@ -0,0 +1,92 @@
|
||||||
|
# nomad/ — Agent Instructions
|
||||||
|
|
||||||
|
Nomad + Vault HCL for the factory's single-node cluster. These files are
|
||||||
|
the source of truth that `lib/init/nomad/cluster-up.sh` copies onto a
|
||||||
|
factory box under `/etc/nomad.d/` and `/etc/vault.d/` at init time.
|
||||||
|
|
||||||
|
This directory is part of the **Nomad+Vault migration (Step 0)** —
|
||||||
|
see issues #821–#825 for the step breakdown. Jobspecs land in Step 1.
|
||||||
|
|
||||||
|
## What lives here
|
||||||
|
|
||||||
|
| File | Deployed to | Owned by |
|
||||||
|
|---|---|---|
|
||||||
|
| `server.hcl` | `/etc/nomad.d/server.hcl` | agent role, bind, ports, `data_dir` (S0.2) |
|
||||||
|
| `client.hcl` | `/etc/nomad.d/client.hcl` | Docker driver cfg + `host_volume` declarations (S0.2) |
|
||||||
|
| `vault.hcl` | `/etc/vault.d/vault.hcl` | Vault storage, listener, UI, `disable_mlock` (S0.3) |
|
||||||
|
|
||||||
|
Nomad auto-merges every `*.hcl` under `-config=/etc/nomad.d/`, so the
|
||||||
|
split between `server.hcl` and `client.hcl` is for readability, not
|
||||||
|
semantics. The top-of-file header in each config documents which blocks
|
||||||
|
it owns.
|
||||||
|
|
||||||
|
## What does NOT live here yet
|
||||||
|
|
||||||
|
- **Jobspecs.** Step 0 brings up an *empty* cluster. Step 1 (and later)
|
||||||
|
adds `*.nomad.hcl` job files for forgejo, woodpecker, agents, caddy,
|
||||||
|
etc. When that lands, jobspecs will live in `nomad/jobs/` and each
|
||||||
|
will get its own header comment pointing to the `host_volume` names
|
||||||
|
it consumes (`volume = "forgejo-data"`, etc. — declared in
|
||||||
|
`client.hcl`).
|
||||||
|
- **TLS, ACLs, gossip encryption.** Deliberately absent in Step 0 —
|
||||||
|
factory traffic stays on localhost. These land in later migration
|
||||||
|
steps alongside multi-node support.
|
||||||
|
|
||||||
|
## Adding a jobspec (Step 1 and later)
|
||||||
|
|
||||||
|
1. Drop a file in `nomad/jobs/<service>.nomad.hcl`.
|
||||||
|
2. If it needs persistent state, reference a `host_volume` already
|
||||||
|
declared in `client.hcl` — *don't* add ad-hoc host paths in the
|
||||||
|
jobspec. If a new volume is needed, add it to **both**:
|
||||||
|
- `nomad/client.hcl` — the `host_volume "<name>" { path = … }` block
|
||||||
|
- `lib/init/nomad/cluster-up.sh` — the `HOST_VOLUME_DIRS` array
|
||||||
|
The two must stay in sync or nomad fingerprinting will fail and the
|
||||||
|
node stays in "initializing".
|
||||||
|
3. Pin image tags — `image = "forgejo/forgejo:1.22.5"`, not `:latest`.
|
||||||
|
4. Add the jobspec path to `.woodpecker/nomad-validate.yml`'s trigger
|
||||||
|
list so CI validates it.
|
||||||
|
|
||||||
|
## How CI validates these files
|
||||||
|
|
||||||
|
`.woodpecker/nomad-validate.yml` runs on every PR that touches `nomad/`,
|
||||||
|
`lib/init/nomad/`, or `bin/disinto`. Four fail-closed steps:
|
||||||
|
|
||||||
|
1. **`nomad config validate nomad/server.hcl nomad/client.hcl`**
|
||||||
|
— parses the HCL, fails on unknown blocks, bad port ranges, invalid
|
||||||
|
driver config. Vault HCL is excluded (different tool).
|
||||||
|
2. **`vault operator diagnose -config=nomad/vault.hcl -skip=storage -skip=listener`**
|
||||||
|
— Vault's equivalent syntax + schema check. `-skip=storage/listener`
|
||||||
|
disables the runtime checks (CI containers don't have
|
||||||
|
`/var/lib/vault/data` or port 8200).
|
||||||
|
3. **`shellcheck --severity=warning lib/init/nomad/*.sh bin/disinto`**
|
||||||
|
— all init/dispatcher shell clean. `bin/disinto` has no `.sh`
|
||||||
|
extension so the repo-wide shellcheck in `.woodpecker/ci.yml` skips
|
||||||
|
it — this is the one place it gets checked.
|
||||||
|
4. **`bats tests/disinto-init-nomad.bats`**
|
||||||
|
— exercises the dispatcher: `disinto init --backend=nomad --dry-run`,
|
||||||
|
`… --empty --dry-run`, and the `--backend=docker` regression guard.
|
||||||
|
|
||||||
|
If a PR breaks `nomad/server.hcl` (e.g. typo in a block name), step 1
|
||||||
|
fails with a clear error; the fix makes it pass. PRs that don't touch
|
||||||
|
any of the trigger paths skip this pipeline entirely.
|
||||||
|
|
||||||
|
## Version pinning
|
||||||
|
|
||||||
|
Nomad + Vault versions are pinned in **two** places — bumping one
|
||||||
|
without the other is a CI-caught drift:
|
||||||
|
|
||||||
|
- `lib/init/nomad/install.sh` — the apt-installed versions on factory
|
||||||
|
boxes (`NOMAD_VERSION`, `VAULT_VERSION`).
|
||||||
|
- `.woodpecker/nomad-validate.yml` — the `hashicorp/nomad:…` and
|
||||||
|
`hashicorp/vault:…` image tags used for static validation.
|
||||||
|
|
||||||
|
Bump both in the same PR. The CI pipeline will fail if the pinned
|
||||||
|
image's `config validate` rejects syntax the installed runtime would
|
||||||
|
accept (or vice versa).
|
||||||
|
|
||||||
|
## Related
|
||||||
|
|
||||||
|
- `lib/init/nomad/` — installer + systemd units + cluster-up orchestrator.
|
||||||
|
- `.woodpecker/nomad-validate.yml` — this directory's CI pipeline.
|
||||||
|
- Top-of-file headers in `server.hcl` / `client.hcl` / `vault.hcl`
|
||||||
|
document the per-file ownership contract.
|
||||||
106
tests/disinto-init-nomad.bats
Normal file
106
tests/disinto-init-nomad.bats
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
#!/usr/bin/env bats
|
||||||
|
# =============================================================================
|
||||||
|
# tests/disinto-init-nomad.bats — Regression guard for `disinto init`
|
||||||
|
# backend dispatch (S0.5, issue #825).
|
||||||
|
#
|
||||||
|
# Exercises the three CLI paths the Nomad+Vault migration cares about:
|
||||||
|
# 1. --backend=nomad --dry-run → cluster-up step list
|
||||||
|
# 2. --backend=nomad --empty --dry-run → same, with "--empty" banner
|
||||||
|
# 3. --backend=docker --dry-run → docker path unaffected
|
||||||
|
#
|
||||||
|
# A throw-away `placeholder/repo` slug satisfies the CLI's positional-arg
|
||||||
|
# requirement (the nomad dispatcher never touches it). --dry-run on both
|
||||||
|
# backends short-circuits before any network/filesystem mutation, so the
|
||||||
|
# suite is hermetic — no Forgejo, no sudo, no real cluster.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
setup_file() {
|
||||||
|
export DISINTO_ROOT
|
||||||
|
DISINTO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
|
||||||
|
export DISINTO_BIN="${DISINTO_ROOT}/bin/disinto"
|
||||||
|
[ -x "$DISINTO_BIN" ] || {
|
||||||
|
echo "disinto binary not executable: $DISINTO_BIN" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── --backend=nomad --dry-run ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@test "disinto init --backend=nomad --dry-run exits 0 and prints the step list" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --dry-run
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
|
||||||
|
# Dispatcher banner (cluster-up mode, no --empty).
|
||||||
|
[[ "$output" == *"nomad backend: default (cluster-up; jobs deferred to Step 1)"* ]]
|
||||||
|
|
||||||
|
# All nine cluster-up dry-run steps, in order.
|
||||||
|
[[ "$output" == *"[dry-run] Step 1/9: install nomad + vault binaries"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 2/9: write + enable nomad.service (NOT started)"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 3/9: write + enable vault.service + vault.hcl (NOT started)"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 4/9: create host-volume dirs under /srv/disinto/"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 5/9: install /etc/nomad.d/server.hcl + client.hcl from repo"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 6/9: first-run vault init + persist unseal.key + root.token"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 7/9: systemctl start vault + poll until unsealed"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 8/9: systemctl start nomad + poll until ≥1 node ready"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 9/9: write /etc/profile.d/disinto-nomad.sh"* ]]
|
||||||
|
|
||||||
|
[[ "$output" == *"Dry run complete — no changes made."* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── --backend=nomad --empty --dry-run ────────────────────────────────────────
|
||||||
|
|
||||||
|
@test "disinto init --backend=nomad --empty --dry-run prints the --empty banner + step list" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --empty --dry-run
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
|
||||||
|
# --empty changes the dispatcher banner but not the step list — Step 1
|
||||||
|
# of the migration will branch on $empty to gate job deployment; today
|
||||||
|
# both modes invoke the same cluster-up dry-run.
|
||||||
|
[[ "$output" == *"nomad backend: --empty (cluster-up only, no jobs)"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 1/9: install nomad + vault binaries"* ]]
|
||||||
|
[[ "$output" == *"Dry run complete — no changes made."* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── --backend=docker (regression guard) ──────────────────────────────────────
|
||||||
|
|
||||||
|
@test "disinto init --backend=docker does NOT dispatch to the nomad path" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=docker --dry-run
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
|
||||||
|
# Negative assertion: the nomad dispatcher banners must be absent.
|
||||||
|
[[ "$output" != *"nomad backend:"* ]]
|
||||||
|
[[ "$output" != *"[dry-run] Step 1/9: install nomad + vault binaries"* ]]
|
||||||
|
|
||||||
|
# Positive assertion: docker-path output still appears — the existing
|
||||||
|
# docker dry-run printed "=== disinto init ===" before listing the
|
||||||
|
# intended forge/compose actions.
|
||||||
|
[[ "$output" == *"=== disinto init ==="* ]]
|
||||||
|
[[ "$output" == *"── Dry-run: intended actions ────"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Flag syntax: --flag=value vs --flag value ────────────────────────────────
|
||||||
|
|
||||||
|
# Both forms must work. The bin/disinto flag loop has separate cases for
|
||||||
|
# `--backend value` and `--backend=value`; a regression in either would
|
||||||
|
# silently route to the docker default, which is the worst failure mode
|
||||||
|
# for a mid-migration dispatcher ("loud-failing stub" lesson from S0.4).
|
||||||
|
@test "disinto init --backend nomad (space-separated) dispatches to nomad" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend nomad --dry-run
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *"nomad backend: default"* ]]
|
||||||
|
[[ "$output" == *"[dry-run] Step 1/9: install nomad + vault binaries"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Flag validation ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@test "--backend=bogus is rejected with a clear error" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=bogus --dry-run
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[[ "$output" == *"invalid --backend value"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "--empty without --backend=nomad is rejected" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=docker --empty --dry-run
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[[ "$output" == *"--empty is only valid with --backend=nomad"* ]]
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue