fix: [nomad-step-1] S1.3 — wire --with forgejo into bin/disinto init --backend=nomad (#842) #868

Merged
dev-bot merged 5 commits from fix/issue-842-1 into main 2026-04-16 12:50:49 +00:00
5 changed files with 181 additions and 37 deletions

View file

@ -16,7 +16,7 @@
# Steps (all fail-closed — any error blocks merge): # Steps (all fail-closed — any error blocks merge):
# 1. nomad-config-validate — `nomad config validate` on server + client HCL # 1. nomad-config-validate — `nomad config validate` on server + client HCL
# 2. nomad-job-validate — `nomad job validate` looped over every # 2. nomad-job-validate — `nomad job validate` looped over every
# nomad/jobs/*.nomad.hcl (new jobspecs get # nomad/jobs/*.hcl (new jobspecs get
# CI coverage automatically) # CI coverage automatically)
# 3. vault-operator-diagnose — `vault operator diagnose` syntax check on vault.hcl # 3. vault-operator-diagnose — `vault operator diagnose` syntax check on vault.hcl
# 4. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto # 4. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto
@ -57,6 +57,7 @@ steps:
- name: nomad-config-validate - name: nomad-config-validate
image: hashicorp/nomad:1.9.5 image: hashicorp/nomad:1.9.5
commands: commands:
- nomad version
- nomad config validate nomad/server.hcl nomad/client.hcl - nomad config validate nomad/server.hcl nomad/client.hcl
# ── 2. Nomad jobspec HCL syntax check ──────────────────────────────────── # ── 2. Nomad jobspec HCL syntax check ────────────────────────────────────
@ -68,15 +69,15 @@ steps:
# #
# Validation is offline: no running Nomad server is required (exit 0 on # Validation is offline: no running Nomad server is required (exit 0 on
# valid HCL, 1 on syntax/semantic error). The CLI takes a single path # valid HCL, 1 on syntax/semantic error). The CLI takes a single path
# argument so we loop over every `*.nomad.hcl` file under nomad/jobs/ — # argument so we loop over every `*.hcl` file under nomad/jobs/ —
# that way a new jobspec PR gets CI coverage automatically (no separate # that way a new jobspec PR gets CI coverage automatically (no separate
# "edit the pipeline" step to forget). The `.nomad.hcl` suffix is the # "edit the pipeline" step to forget). The `.hcl` suffix is the naming
# naming convention documented in nomad/AGENTS.md; anything else in # convention: anything else in nomad/jobs/ is deliberately not validated
# nomad/jobs/ is deliberately not validated by this step. # by this step.
# #
# `[ -f "$f" ]` guards against the no-match case: POSIX sh does not # `[ -f "$f" ]` guards against the no-match case: POSIX sh does not
# nullglob, so an empty jobs/ directory would leave the literal glob in # nullglob, so an empty jobs/ directory would leave the literal glob in
# "$f" and fail. Today forgejo.nomad.hcl exists, but the guard keeps the # "$f" and fail. Today forgejo.hcl exists, but the guard keeps the
# step safe during any future transient empty state. # step safe during any future transient empty state.
# #
# Scope note: offline validate catches jobspec-level errors (unknown # Scope note: offline validate catches jobspec-level errors (unknown
@ -91,7 +92,7 @@ steps:
commands: commands:
- | - |
set -e set -e
for f in nomad/jobs/*.nomad.hcl; do for f in nomad/jobs/*.hcl; do
[ -f "$f" ] || continue [ -f "$f" ] || continue
echo "validating jobspec: $f" echo "validating jobspec: $f"
nomad job validate "$f" nomad job validate "$f"

View file

@ -82,6 +82,7 @@ Init options:
--ci-id <n> Woodpecker CI repo ID (default: 0 = no CI) --ci-id <n> Woodpecker CI repo ID (default: 0 = no CI)
--forge-url <url> Forge base URL (default: http://localhost:3000) --forge-url <url> Forge base URL (default: http://localhost:3000)
--backend <value> Orchestration backend: docker (default) | nomad --backend <value> Orchestration backend: docker (default) | nomad
--with <services> (nomad) Deploy services: forgejo[,...] (S1.3)
--empty (nomad) Bring up cluster only, no jobs (S0.4) --empty (nomad) Bring up cluster only, no jobs (S0.4)
--bare Skip compose generation (bare-metal setup) --bare Skip compose generation (bare-metal setup)
--build Use local docker build instead of registry images (dev mode) --build Use local docker build instead of registry images (dev mode)
@ -662,14 +663,20 @@ prompt_admin_password() {
# init run); operators running without sudo-NOPASSWD should invoke # init run); operators running without sudo-NOPASSWD should invoke
# `sudo disinto init ...` directly. # `sudo disinto init ...` directly.
_disinto_init_nomad() { _disinto_init_nomad() {
local dry_run="${1:-false}" empty="${2:-false}" local dry_run="${1:-false}" empty="${2:-false}" with_services="${3:-}"
local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh" local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh"
local deploy_sh="${FACTORY_ROOT}/lib/init/nomad/deploy.sh"
if [ ! -x "$cluster_up" ]; then if [ ! -x "$cluster_up" ]; then
echo "Error: ${cluster_up} not found or not executable" >&2 echo "Error: ${cluster_up} not found or not executable" >&2
exit 1 exit 1
fi fi
if [ -n "$with_services" ] && [ ! -x "$deploy_sh" ]; then
echo "Error: ${deploy_sh} not found or not executable" >&2
exit 1
fi
# --empty and default both invoke cluster-up today. Log the requested # --empty and default both invoke cluster-up today. Log the requested
# mode so the dispatch is visible in factory bootstrap logs — Step 1 # mode so the dispatch is visible in factory bootstrap logs — Step 1
# will branch on $empty to gate the job-deployment path. # will branch on $empty to gate the job-deployment path.
@ -679,31 +686,105 @@ _disinto_init_nomad() {
echo "nomad backend: default (cluster-up; jobs deferred to Step 1)" echo "nomad backend: default (cluster-up; jobs deferred to Step 1)"
fi fi
# Dry-run forwards straight through; cluster-up.sh prints its own step # Dry-run: print cluster-up plan + deploy.sh plan
# list and exits 0 without touching the box.
local -a cmd=("$cluster_up")
if [ "$dry_run" = "true" ]; then if [ "$dry_run" = "true" ]; then
cmd+=("--dry-run") echo ""
"${cmd[@]}" echo "── Cluster-up dry-run ─────────────────────────────────"
exit $? local -a cmd=("$cluster_up" "--dry-run")
"${cmd[@]}" || true
echo ""
if [ -n "$with_services" ]; then
echo "── Deploy services dry-run ────────────────────────────"
echo "[deploy] services to deploy: ${with_services}"
local IFS=','
for svc in $with_services; do
svc=$(echo "$svc" | xargs) # trim whitespace
# Validate known services first
case "$svc" in
forgejo) ;;
*)
echo "Error: unknown service '${svc}' — known: forgejo" >&2
exit 1
;;
esac
local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl"
if [ ! -f "$jobspec_path" ]; then
echo "Error: jobspec not found: ${jobspec_path}" >&2
exit 1
fi
echo "[deploy] [dry-run] nomad job validate ${jobspec_path}"
echo "[deploy] [dry-run] nomad job run -detach ${jobspec_path}"
done
echo "[deploy] dry-run complete"
fi
exit 0
fi fi
# Real run — needs root. Invoke via sudo if we're not already root so # Real run: cluster-up + deploy services
# the command's exit code propagates directly. We don't distinguish local -a cluster_cmd=("$cluster_up")
# "sudo denied" from "cluster-up.sh failed" here; both surface as a
# non-zero exit, and cluster-up.sh's own error messages cover the
# latter case.
local rc=0
if [ "$(id -u)" -eq 0 ]; then if [ "$(id -u)" -eq 0 ]; then
"${cmd[@]}" || rc=$? "${cluster_cmd[@]}" || exit $?
else else
if ! command -v sudo >/dev/null 2>&1; then if ! command -v sudo >/dev/null 2>&1; then
echo "Error: cluster-up.sh must run as root and sudo is not installed" >&2 echo "Error: cluster-up.sh must run as root and sudo is not installed" >&2
exit 1 exit 1
fi fi
sudo -n -- "${cmd[@]}" || rc=$? sudo -n -- "${cluster_cmd[@]}" || exit $?
fi fi
exit "$rc"
# Deploy services if requested
if [ -n "$with_services" ]; then
echo ""
echo "── Deploying services ─────────────────────────────────"
local -a deploy_cmd=("$deploy_sh")
# Split comma-separated service list into positional args
local IFS=','
for svc in $with_services; do
svc=$(echo "$svc" | xargs) # trim whitespace
if ! echo "$svc" | grep -qE '^[a-zA-Z0-9_-]+$'; then
echo "Error: invalid service name '${svc}' — must match ^[a-zA-Z0-9_-]+$" >&2
exit 1
fi
# Validate known services FIRST (before jobspec check)
case "$svc" in
forgejo) ;;
*)
echo "Error: unknown service '${svc}' — known: forgejo" >&2
exit 1
;;
esac
# Check jobspec exists
local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl"
if [ ! -f "$jobspec_path" ]; then
echo "Error: jobspec not found: ${jobspec_path}" >&2
exit 1
fi
deploy_cmd+=("$svc")
done
if [ "$(id -u)" -eq 0 ]; then
"${deploy_cmd[@]}" || exit $?
else
if ! command -v sudo >/dev/null 2>&1; then
echo "Error: deploy.sh must run as root and sudo is not installed" >&2
exit 1
fi
sudo -n -- "${deploy_cmd[@]}" || exit $?
fi
# Print final summary
echo ""
echo "── Summary ────────────────────────────────────────────"
echo "Cluster: Nomad+Vault cluster is up"
echo "Deployed: ${with_services}"
if echo "$with_services" | grep -q "forgejo"; then
echo "Ports: forgejo: 3000"
fi
echo "────────────────────────────────────────────────────────"
fi
exit 0
} }
disinto_init() { disinto_init() {
@ -721,7 +802,7 @@ disinto_init() {
fi fi
# Parse flags # Parse flags
local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false with_services=""
while [ $# -gt 0 ]; do while [ $# -gt 0 ]; do
case "$1" in case "$1" in
--branch) branch="$2"; shift 2 ;; --branch) branch="$2"; shift 2 ;;
@ -730,6 +811,8 @@ disinto_init() {
--forge-url) forge_url_flag="$2"; shift 2 ;; --forge-url) forge_url_flag="$2"; shift 2 ;;
--backend) backend="$2"; shift 2 ;; --backend) backend="$2"; shift 2 ;;
--backend=*) backend="${1#--backend=}"; shift ;; --backend=*) backend="${1#--backend=}"; shift ;;
--with) with_services="$2"; shift 2 ;;
--with=*) with_services="${1#--with=}"; shift ;;
--bare) bare=true; shift ;; --bare) bare=true; shift ;;
--build) use_build=true; shift ;; --build) use_build=true; shift ;;
--empty) empty=true; shift ;; --empty) empty=true; shift ;;
@ -764,11 +847,23 @@ disinto_init() {
exit 1 exit 1
fi fi
# --with requires --backend=nomad
if [ -n "$with_services" ] && [ "$backend" != "nomad" ]; then
echo "Error: --with requires --backend=nomad" >&2
exit 1
fi
# --empty and --with are mutually exclusive
if [ "$empty" = true ] && [ -n "$with_services" ]; then
echo "Error: --empty and --with are mutually exclusive" >&2
exit 1
fi
# Dispatch on backend — the nomad path runs lib/init/nomad/cluster-up.sh # Dispatch on backend — the nomad path runs lib/init/nomad/cluster-up.sh
# (S0.4). The default and --empty variants are identical today; Step 1 # (S0.4). The default and --empty variants are identical today; Step 1
# will branch on $empty to add job deployment to the default path. # will branch on $empty to add job deployment to the default path.
if [ "$backend" = "nomad" ]; then if [ "$backend" = "nomad" ]; then
_disinto_init_nomad "$dry_run" "$empty" _disinto_init_nomad "$dry_run" "$empty" "$with_services"
# shellcheck disable=SC2317 # _disinto_init_nomad always exits today; # shellcheck disable=SC2317 # _disinto_init_nomad always exits today;
# `return` is defensive against future refactors. # `return` is defensive against future refactors.
return return

View file

@ -24,7 +24,7 @@ it owns.
## What does NOT live here yet ## What does NOT live here yet
- **Jobspecs.** Step 0 brings up an *empty* cluster. Step 1 (and later) - **Jobspecs.** Step 0 brings up an *empty* cluster. Step 1 (and later)
adds `*.nomad.hcl` job files for forgejo, woodpecker, agents, caddy, adds `*.hcl` job files for forgejo, woodpecker, agents, caddy,
etc. When that lands, jobspecs will live in `nomad/jobs/` and each etc. When that lands, jobspecs will live in `nomad/jobs/` and each
will get its own header comment pointing to the `host_volume` names will get its own header comment pointing to the `host_volume` names
it consumes (`volume = "forgejo-data"`, etc. — declared in it consumes (`volume = "forgejo-data"`, etc. — declared in
@ -35,11 +35,11 @@ it owns.
## Adding a jobspec (Step 1 and later) ## Adding a jobspec (Step 1 and later)
1. Drop a file in `nomad/jobs/<service>.nomad.hcl`. The `.nomad.hcl` 1. Drop a file in `nomad/jobs/<service>.hcl`. The `.hcl` suffix is
suffix is load-bearing: `.woodpecker/nomad-validate.yml` globs on load-bearing: `.woodpecker/nomad-validate.yml` globs on exactly that
exactly that suffix to auto-pick up new jobspecs (see step 2 in suffix to auto-pick up new jobspecs (see step 2 in "How CI validates
"How CI validates these files" below). Anything else in these files" below). Anything else in `nomad/jobs/` is silently
`nomad/jobs/` is silently skipped by CI. skipped by CI.
2. If it needs persistent state, reference a `host_volume` already 2. If it needs persistent state, reference a `host_volume` already
declared in `client.hcl`*don't* add ad-hoc host paths in the declared in `client.hcl`*don't* add ad-hoc host paths in the
jobspec. If a new volume is needed, add it to **both**: jobspec. If a new volume is needed, add it to **both**:
@ -52,9 +52,9 @@ it owns.
rejects the mismatch at placement time instead. rejects the mismatch at placement time instead.
3. Pin image tags — `image = "forgejo/forgejo:1.22.5"`, not `:latest`. 3. Pin image tags — `image = "forgejo/forgejo:1.22.5"`, not `:latest`.
4. No pipeline edit required — step 2 of `nomad-validate.yml` globs 4. No pipeline edit required — step 2 of `nomad-validate.yml` globs
over `nomad/jobs/*.nomad.hcl` and validates every match. Just make over `nomad/jobs/*.hcl` and validates every match. Just make sure
sure the existing `nomad/**` trigger path still covers your file the existing `nomad/**` trigger path still covers your file (it
(it does for anything under `nomad/jobs/`). does for anything under `nomad/jobs/`).
## How CI validates these files ## How CI validates these files
@ -67,7 +67,7 @@ fail-closed steps:
driver config. Vault HCL is excluded (different tool). Jobspecs are driver config. Vault HCL is excluded (different tool). Jobspecs are
excluded too — agent-config and jobspec are disjoint HCL grammars; excluded too — agent-config and jobspec are disjoint HCL grammars;
running this step on a jobspec rejects it with "unknown block 'job'". running this step on a jobspec rejects it with "unknown block 'job'".
2. **`nomad job validate nomad/jobs/*.nomad.hcl`** (loop, one call per file) 2. **`nomad job validate nomad/jobs/*.hcl`** (loop, one call per file)
— parses each jobspec's HCL, fails on unknown stanzas, missing — parses each jobspec's HCL, fails on unknown stanzas, missing
required fields, wrong value types, invalid driver config. Runs required fields, wrong value types, invalid driver config. Runs
offline (no Nomad server needed) so CI exit 0 ≠ "this will schedule offline (no Nomad server needed) so CI exit 0 ≠ "this will schedule
@ -79,7 +79,7 @@ fail-closed steps:
- image reachability — `image = "codeberg.org/forgejo/forgejo:11.0"` - image reachability — `image = "codeberg.org/forgejo/forgejo:11.0"`
is accepted even if the registry is down or the tag is wrong. is accepted even if the registry is down or the tag is wrong.
New jobspecs are picked up automatically by the glob — no pipeline New jobspecs are picked up automatically by the glob — no pipeline
edit needed as long as the file is named `<name>.nomad.hcl`. edit needed as long as the file is named `<name>.hcl`.
3. **`vault operator diagnose -config=nomad/vault.hcl -skip=storage -skip=listener`** 3. **`vault operator diagnose -config=nomad/vault.hcl -skip=storage -skip=listener`**
— Vault's equivalent syntax + schema check. `-skip=storage/listener` — Vault's equivalent syntax + schema check. `-skip=storage/listener`
disables the runtime checks (CI containers don't have disables the runtime checks (CI containers don't have

View file

@ -1,5 +1,5 @@
# ============================================================================= # =============================================================================
# nomad/jobs/forgejo.nomad.hcl Forgejo git server (Nomad service job) # nomad/jobs/forgejo.hcl Forgejo git server (Nomad service job)
# #
# Part of the Nomad+Vault migration (S1.1, issue #840). First jobspec to # Part of the Nomad+Vault migration (S1.1, issue #840). First jobspec to
# land under nomad/jobs/ proves the docker driver + host_volume plumbing # land under nomad/jobs/ proves the docker driver + host_volume plumbing

View file

@ -143,3 +143,51 @@ setup_file() {
[[ "$output" == *"repo URL required"* ]] [[ "$output" == *"repo URL required"* ]]
[[ "$output" != *"Unknown option"* ]] [[ "$output" != *"Unknown option"* ]]
} }
# ── --with flag tests ─────────────────────────────────────────────────────────
@test "disinto init --backend=nomad --with forgejo --dry-run prints deploy plan" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"services to deploy: forgejo"* ]]
[[ "$output" == *"[deploy] [dry-run] nomad job validate"* ]]
[[ "$output" == *"[deploy] [dry-run] nomad job run -detach"* ]]
[[ "$output" == *"[deploy] dry-run complete"* ]]
}
@test "disinto init --backend=nomad --with forgejo,forgejo --dry-run handles comma-separated services" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo,forgejo --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"services to deploy: forgejo,forgejo"* ]]
}
@test "disinto init --backend=docker --with forgejo errors with '--with requires --backend=nomad'" {
run "$DISINTO_BIN" init placeholder/repo --backend=docker --with forgejo
[ "$status" -ne 0 ]
[[ "$output" == *"--with requires --backend=nomad"* ]]
}
@test "disinto init --backend=nomad --empty --with forgejo errors with mutually exclusive" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --empty --with forgejo
[ "$status" -ne 0 ]
[[ "$output" == *"--empty and --with are mutually exclusive"* ]]
}
@test "disinto init --backend=nomad --with unknown-service errors with unknown service" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with unknown-service --dry-run
[ "$status" -ne 0 ]
[[ "$output" == *"unknown service"* ]]
[[ "$output" == *"known: forgejo"* ]]
}
@test "disinto init --backend=nomad --with forgejo (flag=value syntax) works" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with=forgejo --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"services to deploy: forgejo"* ]]
}
@test "disinto init --backend=nomad --with forgejo --empty --dry-run rejects in any order" {
run "$DISINTO_BIN" init placeholder/repo --with forgejo --backend=nomad --empty --dry-run
[ "$status" -ne 0 ]
[[ "$output" == *"--empty and --with are mutually exclusive"* ]]
}