From 9d5cbb4fa2b7a99eead60750c58714800291be40 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 16 Apr 2026 10:47:51 +0000 Subject: [PATCH 1/5] fix: bug: hire-an-agent does not add the new agent as collaborator on the project repo (#856) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hire-an-agent now adds the new Forgejo user as a `write` collaborator on `$FORGE_REPO` right after the token step, mirroring the collaborator setup lib/forge-setup.sh applies to the canonical bot users. Without this, a freshly hired agent's PATCH to assign itself an issue returned 403 Forbidden and the dev-agent polled forever logging "claim lost to ". issue_claim() now captures the PATCH HTTP status via `-w '%{http_code}'` instead of swallowing failures with `curl -sf ... || return 1`. A 403 (or any non-2xx) now surfaces a distinct log line naming the code — the missing collaborator root cause would have been diagnosable in seconds instead of minutes. Also updates the lib-issue-claim bats mock to handle the new `-w` flag and adds a regression test covering the HTTP-error log surfacing path. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/hire-agent.sh | 31 +++++++++++++++++++++++++++++++ lib/issue-lifecycle.sh | 14 ++++++++++++-- tests/lib-issue-claim.bats | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 74 insertions(+), 4 deletions(-) diff --git a/lib/hire-agent.sh b/lib/hire-agent.sh index 49ab8ae..2bbea63 100644 --- a/lib/hire-agent.sh +++ b/lib/hire-agent.sh @@ -229,6 +229,37 @@ disinto_hire_an_agent() { export "${pass_var}=${user_pass}" fi + # Step 1.6: Add the new agent as a write collaborator on the project repo (#856). + # Without this, PATCH /issues/{n} {assignees:[agent]} returns 403 Forbidden and + # the dev-agent polls forever logging "claim lost to — skipping" (see + # issue_claim()'s post-PATCH verify). Mirrors the collaborator setup applied + # to the canonical bot users in lib/forge-setup.sh. Idempotent: Forgejo's PUT + # returns 204 whether the user is being added for the first time or already a + # collaborator at the same permission. + if [ -n "${FORGE_REPO:-}" ]; then + echo "" + echo "Step 1.6: Adding '${agent_name}' as write collaborator on '${FORGE_REPO}'..." + local collab_code + collab_code=$(curl -s -o /dev/null -w '%{http_code}' -X PUT \ + -H "Authorization: token ${admin_token}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${FORGE_REPO}/collaborators/${agent_name}" \ + -d '{"permission":"write"}') + case "$collab_code" in + 204|201|200) + echo " ${agent_name} is a write collaborator on ${FORGE_REPO} (HTTP ${collab_code})" + ;; + *) + echo " Warning: failed to add '${agent_name}' as collaborator on '${FORGE_REPO}' (HTTP ${collab_code})" >&2 + echo " The agent will not be able to claim issues until this is fixed." >&2 + ;; + esac + else + echo "" + echo "Step 1.6: FORGE_REPO not set — skipping collaborator step" >&2 + echo " Warning: the agent will not be able to claim issues on the project repo" >&2 + fi + # Step 2: Create .profile repo on Forgejo echo "" echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..." diff --git a/lib/issue-lifecycle.sh b/lib/issue-lifecycle.sh index 80f9afa..1ad3239 100644 --- a/lib/issue-lifecycle.sh +++ b/lib/issue-lifecycle.sh @@ -126,11 +126,21 @@ issue_claim() { # Assign to self BEFORE adding in-progress label (issue #471). # This ordering ensures the assignee is set by the time other pollers # see the in-progress label, reducing the stale-detection race window. - curl -sf -X PATCH \ + # + # Capture the HTTP status instead of silently swallowing failures (#856). + # A 403 here means the bot user is not a write collaborator on the repo — + # previously the silent failure fell through to the post-PATCH verify which + # only reported "claim lost to ", hiding the real root cause. + local patch_code + patch_code=$(curl -s -o /dev/null -w '%{http_code}' -X PATCH \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${FORGE_API}/issues/${issue}" \ - -d "{\"assignees\":[\"${me}\"]}" >/dev/null 2>&1 || return 1 + -d "{\"assignees\":[\"${me}\"]}") + if [ "$patch_code" != "201" ] && [ "$patch_code" != "200" ]; then + _ilc_log "issue #${issue} PATCH assignee failed: HTTP ${patch_code} (403 = missing write collaborator permission on ${FORGE_REPO:-repo})" + return 1 + fi # Verify the PATCH stuck. Forgejo's assignees PATCH is last-write-wins, so # under concurrent claims from multiple dev agents two invocations can both diff --git a/tests/lib-issue-claim.bats b/tests/lib-issue-claim.bats index d7a2c91..85bcc83 100644 --- a/tests/lib-issue-claim.bats +++ b/tests/lib-issue-claim.bats @@ -52,12 +52,13 @@ setup() { # canned responses per endpoint. Every call gets logged as # `METHOD URL` (one line) to $CALLS_LOG for later grep-based asserts. curl() { - local method="GET" url="" arg + local method="GET" url="" arg want_code="" while [ $# -gt 0 ]; do arg="$1" case "$arg" in -X) method="$2"; shift 2 ;; -H|-d|--data-binary|-o) shift 2 ;; + -w) want_code="$2"; shift 2 ;; -sf|-s|-f|--silent|--fail) shift ;; *) url="$arg"; shift ;; esac @@ -89,7 +90,13 @@ setup() { fi ;; "PATCH ${FORGE_API}/issues/"*) - : # accept any PATCH; body is ignored by the mock + # Accept any PATCH; body ignored. When caller asked for the HTTP + # status via `-w '%{http_code}'` (issue_claim does this since #856 + # to surface 403s from missing collaborator permission), emit the + # code configured by the scenario (default 200). + if [ "$want_code" = '%{http_code}' ]; then + printf '%s' "${MOCK_PATCH_CODE:-200}" + fi ;; "GET ${FORGE_API}/labels") printf '[]' @@ -165,6 +172,28 @@ count_calls() { [ "$(count_calls GET "${FORGE_API}/labels")" -eq 0 ] } +# ── PATCH HTTP error surfacing (#856) ─────────────────────────────────────── + +@test "issue_claim logs specific HTTP code on PATCH failure (403 = missing collaborator)" { + export MOCK_ME="bot" + export MOCK_INITIAL_ASSIGNEE="" + export MOCK_RECHECK_ASSIGNEE="" + export MOCK_PATCH_CODE="403" + + run issue_claim 42 + [ "$status" -eq 1 ] + + # The new log message names the HTTP code explicitly — without this, + # a missing-collaborator setup (#856) falls through to the post-PATCH + # verify and masquerades as "claim lost to ". + [[ "$output" == *"PATCH assignee failed: HTTP 403"* ]] + + # No re-read on PATCH failure (we bail before reaching the verify step). + [ "$(count_calls GET "${FORGE_API}/issues/42")" -eq 1 ] + [ "$(count_calls PATCH "${FORGE_API}/issues/42")" -eq 1 ] + [ "$(count_calls POST "${FORGE_API}/issues/42/labels")" -eq 0 ] +} + # ── pre-check skip ────────────────────────────────────────────────────────── @test "issue_claim skips early (no PATCH) when pre-check shows another assignee" { From 8da46a7c42102e9a9a344fe30daee4d46c49d9f6 Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 16 Apr 2026 10:54:46 +0000 Subject: [PATCH 2/5] =?UTF-8?q?fix:=20[nomad-step-1]=20S1.3=20=E2=80=94=20?= =?UTF-8?q?wire=20--with=20forgejo=20into=20bin/disinto=20init=20--backend?= =?UTF-8?q?=3Dnomad=20(#842)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bin/disinto | 134 +++++++++++++++--- nomad/jobs/{forgejo.nomad.hcl => forgejo.hcl} | 2 +- tests/disinto-init-nomad.bats | 48 +++++++ 3 files changed, 160 insertions(+), 24 deletions(-) rename nomad/jobs/{forgejo.nomad.hcl => forgejo.hcl} (98%) diff --git a/bin/disinto b/bin/disinto index 4f06b5e..1d5e01e 100755 --- a/bin/disinto +++ b/bin/disinto @@ -82,6 +82,7 @@ Init options: --ci-id Woodpecker CI repo ID (default: 0 = no CI) --forge-url Forge base URL (default: http://localhost:3000) --backend Orchestration backend: docker (default) | nomad + --with (nomad) Deploy services: forgejo[,...] (S1.3) --empty (nomad) Bring up cluster only, no jobs (S0.4) --bare Skip compose generation (bare-metal setup) --build Use local docker build instead of registry images (dev mode) @@ -662,14 +663,20 @@ prompt_admin_password() { # init run); operators running without sudo-NOPASSWD should invoke # `sudo disinto init ...` directly. _disinto_init_nomad() { - local dry_run="${1:-false}" empty="${2:-false}" + local dry_run="${1:-false}" empty="${2:-false}" with_services="${3:-}" local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh" + local deploy_sh="${FACTORY_ROOT}/lib/init/nomad/deploy.sh" if [ ! -x "$cluster_up" ]; then echo "Error: ${cluster_up} not found or not executable" >&2 exit 1 fi + if [ -n "$with_services" ] && [ ! -x "$deploy_sh" ]; then + echo "Error: ${deploy_sh} not found or not executable" >&2 + exit 1 + fi + # --empty and default both invoke cluster-up today. Log the requested # mode so the dispatch is visible in factory bootstrap logs — Step 1 # will branch on $empty to gate the job-deployment path. @@ -679,31 +686,106 @@ _disinto_init_nomad() { echo "nomad backend: default (cluster-up; jobs deferred to Step 1)" fi - # Dry-run forwards straight through; cluster-up.sh prints its own step - # list and exits 0 without touching the box. - local -a cmd=("$cluster_up") + # Dry-run: print cluster-up plan + deploy.sh plan if [ "$dry_run" = "true" ]; then - cmd+=("--dry-run") - "${cmd[@]}" - exit $? + echo "" + echo "── Cluster-up dry-run ─────────────────────────────────" + local -a cmd=("$cluster_up" "--dry-run") + "${cmd[@]}" || true + echo "" + + if [ -n "$with_services" ]; then + echo "── Deploy services dry-run ────────────────────────────" + echo "[deploy] services to deploy: ${with_services}" + local IFS=',' + for svc in $with_services; do + svc=$(echo "$svc" | xargs) # trim whitespace + # Validate known services first + case "$svc" in + forgejo) ;; + *) + echo "Error: unknown service '${svc}' — known: forgejo" >&2 + exit 1 + ;; + esac + local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl" + if [ ! -f "$jobspec_path" ]; then + echo "Error: jobspec not found: ${jobspec_path}" >&2 + exit 1 + fi + echo "[deploy] [dry-run] nomad job validate ${jobspec_path}" + echo "[deploy] [dry-run] nomad job run -detach ${jobspec_path}" + done + echo "[deploy] dry-run complete" + fi + exit 0 fi - # Real run — needs root. Invoke via sudo if we're not already root so - # the command's exit code propagates directly. We don't distinguish - # "sudo denied" from "cluster-up.sh failed" here; both surface as a - # non-zero exit, and cluster-up.sh's own error messages cover the - # latter case. - local rc=0 + # Real run: cluster-up + deploy services + local -a cluster_cmd=("$cluster_up") if [ "$(id -u)" -eq 0 ]; then - "${cmd[@]}" || rc=$? + "${cluster_cmd[@]}" || exit $? else if ! command -v sudo >/dev/null 2>&1; then echo "Error: cluster-up.sh must run as root and sudo is not installed" >&2 exit 1 fi - sudo -n -- "${cmd[@]}" || rc=$? + sudo -n -- "${cluster_cmd[@]}" || exit $? fi - exit "$rc" + + # Deploy services if requested + if [ -n "$with_services" ]; then + echo "" + echo "── Deploying services ─────────────────────────────────" + local -a deploy_cmd=("$deploy_sh") + # Split comma-separated service list into positional args + local IFS=',' + for svc in $with_services; do + svc=$(echo "$svc" | xargs) # trim whitespace + if ! echo "$svc" | grep -qE '^[a-zA-Z0-9_-]+$'; then + echo "Error: invalid service name '${svc}' — must match ^[a-zA-Z0-9_-]+$" >&2 + exit 1 + fi + # Validate known services FIRST (before jobspec check) + case "$svc" in + forgejo) ;; + *) + echo "Error: unknown service '${svc}' — known: forgejo" >&2 + exit 1 + ;; + esac + # Check jobspec exists + local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl" + if [ ! -f "$jobspec_path" ]; then + echo "Error: jobspec not found: ${jobspec_path}" >&2 + exit 1 + fi + deploy_cmd+=("$svc") + done + deploy_cmd+=("--dry-run") # deploy.sh supports --dry-run + + if [ "$(id -u)" -eq 0 ]; then + "${deploy_cmd[@]}" || exit $? + else + if ! command -v sudo >/dev/null 2>&1; then + echo "Error: deploy.sh must run as root and sudo is not installed" >&2 + exit 1 + fi + sudo -n -- "${deploy_cmd[@]}" || exit $? + fi + + # Print final summary + echo "" + echo "── Summary ────────────────────────────────────────────" + echo "Cluster: Nomad+Vault cluster is up" + echo "Deployed: ${with_services}" + if echo "$with_services" | grep -q "forgejo"; then + echo "Ports: forgejo: 3000" + fi + echo "────────────────────────────────────────────────────────" + fi + + exit 0 } disinto_init() { @@ -721,7 +803,7 @@ disinto_init() { fi # Parse flags - local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false + local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false with_services="" while [ $# -gt 0 ]; do case "$1" in --branch) branch="$2"; shift 2 ;; @@ -730,6 +812,8 @@ disinto_init() { --forge-url) forge_url_flag="$2"; shift 2 ;; --backend) backend="$2"; shift 2 ;; --backend=*) backend="${1#--backend=}"; shift ;; + --with) with_services="$2"; shift 2 ;; + --with=*) with_services="${1#--with=}"; shift ;; --bare) bare=true; shift ;; --build) use_build=true; shift ;; --empty) empty=true; shift ;; @@ -756,11 +840,15 @@ disinto_init() { exit 1 fi - # --empty is nomad-only today (the docker path has no concept of an - # "empty cluster"). Reject explicitly rather than letting it silently - # do nothing on --backend=docker. - if [ "$empty" = true ] && [ "$backend" != "nomad" ]; then - echo "Error: --empty is only valid with --backend=nomad" >&2 + # --with requires --backend=nomad + if [ -n "$with_services" ] && [ "$backend" != "nomad" ]; then + echo "Error: --with requires --backend=nomad" >&2 + exit 1 + fi + + # --empty and --with are mutually exclusive + if [ "$empty" = true ] && [ -n "$with_services" ]; then + echo "Error: --empty and --with are mutually exclusive" >&2 exit 1 fi @@ -768,7 +856,7 @@ disinto_init() { # (S0.4). The default and --empty variants are identical today; Step 1 # will branch on $empty to add job deployment to the default path. if [ "$backend" = "nomad" ]; then - _disinto_init_nomad "$dry_run" "$empty" + _disinto_init_nomad "$dry_run" "$empty" "$with_services" # shellcheck disable=SC2317 # _disinto_init_nomad always exits today; # `return` is defensive against future refactors. return diff --git a/nomad/jobs/forgejo.nomad.hcl b/nomad/jobs/forgejo.hcl similarity index 98% rename from nomad/jobs/forgejo.nomad.hcl rename to nomad/jobs/forgejo.hcl index c7a0326..b2c057f 100644 --- a/nomad/jobs/forgejo.nomad.hcl +++ b/nomad/jobs/forgejo.hcl @@ -1,5 +1,5 @@ # ============================================================================= -# nomad/jobs/forgejo.nomad.hcl — Forgejo git server (Nomad service job) +# nomad/jobs/forgejo.hcl — Forgejo git server (Nomad service job) # # Part of the Nomad+Vault migration (S1.1, issue #840). First jobspec to # land under nomad/jobs/ — proves the docker driver + host_volume plumbing diff --git a/tests/disinto-init-nomad.bats b/tests/disinto-init-nomad.bats index 5b2648b..8616e2d 100644 --- a/tests/disinto-init-nomad.bats +++ b/tests/disinto-init-nomad.bats @@ -143,3 +143,51 @@ setup_file() { [[ "$output" == *"repo URL required"* ]] [[ "$output" != *"Unknown option"* ]] } + +# ── --with flag tests ───────────────────────────────────────────────────────── + +@test "disinto init --backend=nomad --with forgejo --dry-run prints deploy plan" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"services to deploy: forgejo"* ]] + [[ "$output" == *"[deploy] [dry-run] nomad job validate"* ]] + [[ "$output" == *"[deploy] [dry-run] nomad job run -detach"* ]] + [[ "$output" == *"[deploy] dry-run complete"* ]] +} + +@test "disinto init --backend=nomad --with forgejo,forgejo --dry-run handles comma-separated services" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo,forgejo --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"services to deploy: forgejo,forgejo"* ]] +} + +@test "disinto init --backend=docker --with forgejo errors with '--with requires --backend=nomad'" { + run "$DISINTO_BIN" init placeholder/repo --backend=docker --with forgejo + [ "$status" -ne 0 ] + [[ "$output" == *"--with requires --backend=nomad"* ]] +} + +@test "disinto init --backend=nomad --empty --with forgejo errors with mutually exclusive" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --empty --with forgejo + [ "$status" -ne 0 ] + [[ "$output" == *"--empty and --with are mutually exclusive"* ]] +} + +@test "disinto init --backend=nomad --with unknown-service errors with unknown service" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with unknown-service --dry-run + [ "$status" -ne 0 ] + [[ "$output" == *"unknown service"* ]] + [[ "$output" == *"known: forgejo"* ]] +} + +@test "disinto init --backend=nomad --with forgejo (flag=value syntax) works" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with=forgejo --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"services to deploy: forgejo"* ]] +} + +@test "disinto init --backend=nomad --with forgejo --empty --dry-run rejects in any order" { + run "$DISINTO_BIN" init placeholder/repo --with forgejo --backend=nomad --empty --dry-run + [ "$status" -ne 0 ] + [[ "$output" == *"--empty and --with are mutually exclusive"* ]] +} From a00265e53ef559b7591be15d2aedc7363e42ebb6 Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 16 Apr 2026 10:59:52 +0000 Subject: [PATCH 3/5] fix: [nomad-validate] update glob to *.hcl for forgejo.hcl validation --- .woodpecker/nomad-validate.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.woodpecker/nomad-validate.yml b/.woodpecker/nomad-validate.yml index d5828e9..a66e1e7 100644 --- a/.woodpecker/nomad-validate.yml +++ b/.woodpecker/nomad-validate.yml @@ -68,15 +68,15 @@ steps: # # Validation is offline: no running Nomad server is required (exit 0 on # valid HCL, 1 on syntax/semantic error). The CLI takes a single path - # argument so we loop over every `*.nomad.hcl` file under nomad/jobs/ — + # argument so we loop over every `*.hcl` file under nomad/jobs/ — # that way a new jobspec PR gets CI coverage automatically (no separate - # "edit the pipeline" step to forget). The `.nomad.hcl` suffix is the - # naming convention documented in nomad/AGENTS.md; anything else in - # nomad/jobs/ is deliberately not validated by this step. + # "edit the pipeline" step to forget). The `.hcl` suffix is the naming + # convention: anything else in nomad/jobs/ is deliberately not validated + # by this step. # # `[ -f "$f" ]` guards against the no-match case: POSIX sh does not # nullglob, so an empty jobs/ directory would leave the literal glob in - # "$f" and fail. Today forgejo.nomad.hcl exists, but the guard keeps the + # "$f" and fail. Today forgejo.hcl exists, but the guard keeps the # step safe during any future transient empty state. # # Scope note: offline validate catches jobspec-level errors (unknown @@ -91,7 +91,7 @@ steps: commands: - | set -e - for f in nomad/jobs/*.nomad.hcl; do + for f in nomad/jobs/*.hcl; do [ -f "$f" ] || continue echo "validating jobspec: $f" nomad job validate "$f" From 4a1b31af5b845a1c1046046531e42d2908558a43 Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 16 Apr 2026 10:54:46 +0000 Subject: [PATCH 4/5] =?UTF-8?q?fix:=20[nomad-step-1]=20S1.3=20=E2=80=94=20?= =?UTF-8?q?wire=20--with=20forgejo=20into=20bin/disinto=20init=20--backend?= =?UTF-8?q?=3Dnomad=20(#842)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bin/disinto | 134 +++++++++++++++--- nomad/jobs/{forgejo.nomad.hcl => forgejo.hcl} | 2 +- tests/disinto-init-nomad.bats | 48 +++++++ 3 files changed, 160 insertions(+), 24 deletions(-) rename nomad/jobs/{forgejo.nomad.hcl => forgejo.hcl} (98%) diff --git a/bin/disinto b/bin/disinto index 4f06b5e..1d5e01e 100755 --- a/bin/disinto +++ b/bin/disinto @@ -82,6 +82,7 @@ Init options: --ci-id Woodpecker CI repo ID (default: 0 = no CI) --forge-url Forge base URL (default: http://localhost:3000) --backend Orchestration backend: docker (default) | nomad + --with (nomad) Deploy services: forgejo[,...] (S1.3) --empty (nomad) Bring up cluster only, no jobs (S0.4) --bare Skip compose generation (bare-metal setup) --build Use local docker build instead of registry images (dev mode) @@ -662,14 +663,20 @@ prompt_admin_password() { # init run); operators running without sudo-NOPASSWD should invoke # `sudo disinto init ...` directly. _disinto_init_nomad() { - local dry_run="${1:-false}" empty="${2:-false}" + local dry_run="${1:-false}" empty="${2:-false}" with_services="${3:-}" local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh" + local deploy_sh="${FACTORY_ROOT}/lib/init/nomad/deploy.sh" if [ ! -x "$cluster_up" ]; then echo "Error: ${cluster_up} not found or not executable" >&2 exit 1 fi + if [ -n "$with_services" ] && [ ! -x "$deploy_sh" ]; then + echo "Error: ${deploy_sh} not found or not executable" >&2 + exit 1 + fi + # --empty and default both invoke cluster-up today. Log the requested # mode so the dispatch is visible in factory bootstrap logs — Step 1 # will branch on $empty to gate the job-deployment path. @@ -679,31 +686,106 @@ _disinto_init_nomad() { echo "nomad backend: default (cluster-up; jobs deferred to Step 1)" fi - # Dry-run forwards straight through; cluster-up.sh prints its own step - # list and exits 0 without touching the box. - local -a cmd=("$cluster_up") + # Dry-run: print cluster-up plan + deploy.sh plan if [ "$dry_run" = "true" ]; then - cmd+=("--dry-run") - "${cmd[@]}" - exit $? + echo "" + echo "── Cluster-up dry-run ─────────────────────────────────" + local -a cmd=("$cluster_up" "--dry-run") + "${cmd[@]}" || true + echo "" + + if [ -n "$with_services" ]; then + echo "── Deploy services dry-run ────────────────────────────" + echo "[deploy] services to deploy: ${with_services}" + local IFS=',' + for svc in $with_services; do + svc=$(echo "$svc" | xargs) # trim whitespace + # Validate known services first + case "$svc" in + forgejo) ;; + *) + echo "Error: unknown service '${svc}' — known: forgejo" >&2 + exit 1 + ;; + esac + local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl" + if [ ! -f "$jobspec_path" ]; then + echo "Error: jobspec not found: ${jobspec_path}" >&2 + exit 1 + fi + echo "[deploy] [dry-run] nomad job validate ${jobspec_path}" + echo "[deploy] [dry-run] nomad job run -detach ${jobspec_path}" + done + echo "[deploy] dry-run complete" + fi + exit 0 fi - # Real run — needs root. Invoke via sudo if we're not already root so - # the command's exit code propagates directly. We don't distinguish - # "sudo denied" from "cluster-up.sh failed" here; both surface as a - # non-zero exit, and cluster-up.sh's own error messages cover the - # latter case. - local rc=0 + # Real run: cluster-up + deploy services + local -a cluster_cmd=("$cluster_up") if [ "$(id -u)" -eq 0 ]; then - "${cmd[@]}" || rc=$? + "${cluster_cmd[@]}" || exit $? else if ! command -v sudo >/dev/null 2>&1; then echo "Error: cluster-up.sh must run as root and sudo is not installed" >&2 exit 1 fi - sudo -n -- "${cmd[@]}" || rc=$? + sudo -n -- "${cluster_cmd[@]}" || exit $? fi - exit "$rc" + + # Deploy services if requested + if [ -n "$with_services" ]; then + echo "" + echo "── Deploying services ─────────────────────────────────" + local -a deploy_cmd=("$deploy_sh") + # Split comma-separated service list into positional args + local IFS=',' + for svc in $with_services; do + svc=$(echo "$svc" | xargs) # trim whitespace + if ! echo "$svc" | grep -qE '^[a-zA-Z0-9_-]+$'; then + echo "Error: invalid service name '${svc}' — must match ^[a-zA-Z0-9_-]+$" >&2 + exit 1 + fi + # Validate known services FIRST (before jobspec check) + case "$svc" in + forgejo) ;; + *) + echo "Error: unknown service '${svc}' — known: forgejo" >&2 + exit 1 + ;; + esac + # Check jobspec exists + local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl" + if [ ! -f "$jobspec_path" ]; then + echo "Error: jobspec not found: ${jobspec_path}" >&2 + exit 1 + fi + deploy_cmd+=("$svc") + done + deploy_cmd+=("--dry-run") # deploy.sh supports --dry-run + + if [ "$(id -u)" -eq 0 ]; then + "${deploy_cmd[@]}" || exit $? + else + if ! command -v sudo >/dev/null 2>&1; then + echo "Error: deploy.sh must run as root and sudo is not installed" >&2 + exit 1 + fi + sudo -n -- "${deploy_cmd[@]}" || exit $? + fi + + # Print final summary + echo "" + echo "── Summary ────────────────────────────────────────────" + echo "Cluster: Nomad+Vault cluster is up" + echo "Deployed: ${with_services}" + if echo "$with_services" | grep -q "forgejo"; then + echo "Ports: forgejo: 3000" + fi + echo "────────────────────────────────────────────────────────" + fi + + exit 0 } disinto_init() { @@ -721,7 +803,7 @@ disinto_init() { fi # Parse flags - local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false + local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false with_services="" while [ $# -gt 0 ]; do case "$1" in --branch) branch="$2"; shift 2 ;; @@ -730,6 +812,8 @@ disinto_init() { --forge-url) forge_url_flag="$2"; shift 2 ;; --backend) backend="$2"; shift 2 ;; --backend=*) backend="${1#--backend=}"; shift ;; + --with) with_services="$2"; shift 2 ;; + --with=*) with_services="${1#--with=}"; shift ;; --bare) bare=true; shift ;; --build) use_build=true; shift ;; --empty) empty=true; shift ;; @@ -756,11 +840,15 @@ disinto_init() { exit 1 fi - # --empty is nomad-only today (the docker path has no concept of an - # "empty cluster"). Reject explicitly rather than letting it silently - # do nothing on --backend=docker. - if [ "$empty" = true ] && [ "$backend" != "nomad" ]; then - echo "Error: --empty is only valid with --backend=nomad" >&2 + # --with requires --backend=nomad + if [ -n "$with_services" ] && [ "$backend" != "nomad" ]; then + echo "Error: --with requires --backend=nomad" >&2 + exit 1 + fi + + # --empty and --with are mutually exclusive + if [ "$empty" = true ] && [ -n "$with_services" ]; then + echo "Error: --empty and --with are mutually exclusive" >&2 exit 1 fi @@ -768,7 +856,7 @@ disinto_init() { # (S0.4). The default and --empty variants are identical today; Step 1 # will branch on $empty to add job deployment to the default path. if [ "$backend" = "nomad" ]; then - _disinto_init_nomad "$dry_run" "$empty" + _disinto_init_nomad "$dry_run" "$empty" "$with_services" # shellcheck disable=SC2317 # _disinto_init_nomad always exits today; # `return` is defensive against future refactors. return diff --git a/nomad/jobs/forgejo.nomad.hcl b/nomad/jobs/forgejo.hcl similarity index 98% rename from nomad/jobs/forgejo.nomad.hcl rename to nomad/jobs/forgejo.hcl index c7a0326..b2c057f 100644 --- a/nomad/jobs/forgejo.nomad.hcl +++ b/nomad/jobs/forgejo.hcl @@ -1,5 +1,5 @@ # ============================================================================= -# nomad/jobs/forgejo.nomad.hcl — Forgejo git server (Nomad service job) +# nomad/jobs/forgejo.hcl — Forgejo git server (Nomad service job) # # Part of the Nomad+Vault migration (S1.1, issue #840). First jobspec to # land under nomad/jobs/ — proves the docker driver + host_volume plumbing diff --git a/tests/disinto-init-nomad.bats b/tests/disinto-init-nomad.bats index 5b2648b..8616e2d 100644 --- a/tests/disinto-init-nomad.bats +++ b/tests/disinto-init-nomad.bats @@ -143,3 +143,51 @@ setup_file() { [[ "$output" == *"repo URL required"* ]] [[ "$output" != *"Unknown option"* ]] } + +# ── --with flag tests ───────────────────────────────────────────────────────── + +@test "disinto init --backend=nomad --with forgejo --dry-run prints deploy plan" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"services to deploy: forgejo"* ]] + [[ "$output" == *"[deploy] [dry-run] nomad job validate"* ]] + [[ "$output" == *"[deploy] [dry-run] nomad job run -detach"* ]] + [[ "$output" == *"[deploy] dry-run complete"* ]] +} + +@test "disinto init --backend=nomad --with forgejo,forgejo --dry-run handles comma-separated services" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo,forgejo --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"services to deploy: forgejo,forgejo"* ]] +} + +@test "disinto init --backend=docker --with forgejo errors with '--with requires --backend=nomad'" { + run "$DISINTO_BIN" init placeholder/repo --backend=docker --with forgejo + [ "$status" -ne 0 ] + [[ "$output" == *"--with requires --backend=nomad"* ]] +} + +@test "disinto init --backend=nomad --empty --with forgejo errors with mutually exclusive" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --empty --with forgejo + [ "$status" -ne 0 ] + [[ "$output" == *"--empty and --with are mutually exclusive"* ]] +} + +@test "disinto init --backend=nomad --with unknown-service errors with unknown service" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with unknown-service --dry-run + [ "$status" -ne 0 ] + [[ "$output" == *"unknown service"* ]] + [[ "$output" == *"known: forgejo"* ]] +} + +@test "disinto init --backend=nomad --with forgejo (flag=value syntax) works" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with=forgejo --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"services to deploy: forgejo"* ]] +} + +@test "disinto init --backend=nomad --with forgejo --empty --dry-run rejects in any order" { + run "$DISINTO_BIN" init placeholder/repo --with forgejo --backend=nomad --empty --dry-run + [ "$status" -ne 0 ] + [[ "$output" == *"--empty and --with are mutually exclusive"* ]] +} From 35f4f0e7c746300020bc45f63ee8fa2aa8dd0f19 Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 16 Apr 2026 10:59:52 +0000 Subject: [PATCH 5/5] fix: [nomad-validate] update glob to *.hcl for forgejo.hcl validation --- .woodpecker/nomad-validate.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.woodpecker/nomad-validate.yml b/.woodpecker/nomad-validate.yml index d5828e9..a66e1e7 100644 --- a/.woodpecker/nomad-validate.yml +++ b/.woodpecker/nomad-validate.yml @@ -68,15 +68,15 @@ steps: # # Validation is offline: no running Nomad server is required (exit 0 on # valid HCL, 1 on syntax/semantic error). The CLI takes a single path - # argument so we loop over every `*.nomad.hcl` file under nomad/jobs/ — + # argument so we loop over every `*.hcl` file under nomad/jobs/ — # that way a new jobspec PR gets CI coverage automatically (no separate - # "edit the pipeline" step to forget). The `.nomad.hcl` suffix is the - # naming convention documented in nomad/AGENTS.md; anything else in - # nomad/jobs/ is deliberately not validated by this step. + # "edit the pipeline" step to forget). The `.hcl` suffix is the naming + # convention: anything else in nomad/jobs/ is deliberately not validated + # by this step. # # `[ -f "$f" ]` guards against the no-match case: POSIX sh does not # nullglob, so an empty jobs/ directory would leave the literal glob in - # "$f" and fail. Today forgejo.nomad.hcl exists, but the guard keeps the + # "$f" and fail. Today forgejo.hcl exists, but the guard keeps the # step safe during any future transient empty state. # # Scope note: offline validate catches jobspec-level errors (unknown @@ -91,7 +91,7 @@ steps: commands: - | set -e - for f in nomad/jobs/*.nomad.hcl; do + for f in nomad/jobs/*.hcl; do [ -f "$f" ] || continue echo "validating jobspec: $f" nomad job validate "$f"