Compare commits
4 commits
a00265e53e
...
35f4f0e7c7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
35f4f0e7c7 | ||
|
|
4a1b31af5b | ||
| 016d4fe8cc | |||
|
|
9d5cbb4fa2 |
7 changed files with 240 additions and 34 deletions
|
|
@ -68,15 +68,15 @@ steps:
|
|||
#
|
||||
# Validation is offline: no running Nomad server is required (exit 0 on
|
||||
# valid HCL, 1 on syntax/semantic error). The CLI takes a single path
|
||||
# argument so we loop over every `*.nomad.hcl` file under nomad/jobs/ —
|
||||
# argument so we loop over every `*.hcl` file under nomad/jobs/ —
|
||||
# that way a new jobspec PR gets CI coverage automatically (no separate
|
||||
# "edit the pipeline" step to forget). The `.nomad.hcl` suffix is the
|
||||
# naming convention documented in nomad/AGENTS.md; anything else in
|
||||
# nomad/jobs/ is deliberately not validated by this step.
|
||||
# "edit the pipeline" step to forget). The `.hcl` suffix is the naming
|
||||
# convention: anything else in nomad/jobs/ is deliberately not validated
|
||||
# by this step.
|
||||
#
|
||||
# `[ -f "$f" ]` guards against the no-match case: POSIX sh does not
|
||||
# nullglob, so an empty jobs/ directory would leave the literal glob in
|
||||
# "$f" and fail. Today forgejo.nomad.hcl exists, but the guard keeps the
|
||||
# "$f" and fail. Today forgejo.hcl exists, but the guard keeps the
|
||||
# step safe during any future transient empty state.
|
||||
#
|
||||
# Scope note: offline validate catches jobspec-level errors (unknown
|
||||
|
|
@ -91,7 +91,7 @@ steps:
|
|||
commands:
|
||||
- |
|
||||
set -e
|
||||
for f in nomad/jobs/*.nomad.hcl; do
|
||||
for f in nomad/jobs/*.hcl; do
|
||||
[ -f "$f" ] || continue
|
||||
echo "validating jobspec: $f"
|
||||
nomad job validate "$f"
|
||||
|
|
|
|||
134
bin/disinto
134
bin/disinto
|
|
@ -82,6 +82,7 @@ Init options:
|
|||
--ci-id <n> Woodpecker CI repo ID (default: 0 = no CI)
|
||||
--forge-url <url> Forge base URL (default: http://localhost:3000)
|
||||
--backend <value> Orchestration backend: docker (default) | nomad
|
||||
--with <services> (nomad) Deploy services: forgejo[,...] (S1.3)
|
||||
--empty (nomad) Bring up cluster only, no jobs (S0.4)
|
||||
--bare Skip compose generation (bare-metal setup)
|
||||
--build Use local docker build instead of registry images (dev mode)
|
||||
|
|
@ -662,14 +663,20 @@ prompt_admin_password() {
|
|||
# init run); operators running without sudo-NOPASSWD should invoke
|
||||
# `sudo disinto init ...` directly.
|
||||
_disinto_init_nomad() {
|
||||
local dry_run="${1:-false}" empty="${2:-false}"
|
||||
local dry_run="${1:-false}" empty="${2:-false}" with_services="${3:-}"
|
||||
local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh"
|
||||
local deploy_sh="${FACTORY_ROOT}/lib/init/nomad/deploy.sh"
|
||||
|
||||
if [ ! -x "$cluster_up" ]; then
|
||||
echo "Error: ${cluster_up} not found or not executable" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -n "$with_services" ] && [ ! -x "$deploy_sh" ]; then
|
||||
echo "Error: ${deploy_sh} not found or not executable" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --empty and default both invoke cluster-up today. Log the requested
|
||||
# mode so the dispatch is visible in factory bootstrap logs — Step 1
|
||||
# will branch on $empty to gate the job-deployment path.
|
||||
|
|
@ -679,31 +686,106 @@ _disinto_init_nomad() {
|
|||
echo "nomad backend: default (cluster-up; jobs deferred to Step 1)"
|
||||
fi
|
||||
|
||||
# Dry-run forwards straight through; cluster-up.sh prints its own step
|
||||
# list and exits 0 without touching the box.
|
||||
local -a cmd=("$cluster_up")
|
||||
# Dry-run: print cluster-up plan + deploy.sh plan
|
||||
if [ "$dry_run" = "true" ]; then
|
||||
cmd+=("--dry-run")
|
||||
"${cmd[@]}"
|
||||
exit $?
|
||||
echo ""
|
||||
echo "── Cluster-up dry-run ─────────────────────────────────"
|
||||
local -a cmd=("$cluster_up" "--dry-run")
|
||||
"${cmd[@]}" || true
|
||||
echo ""
|
||||
|
||||
if [ -n "$with_services" ]; then
|
||||
echo "── Deploy services dry-run ────────────────────────────"
|
||||
echo "[deploy] services to deploy: ${with_services}"
|
||||
local IFS=','
|
||||
for svc in $with_services; do
|
||||
svc=$(echo "$svc" | xargs) # trim whitespace
|
||||
# Validate known services first
|
||||
case "$svc" in
|
||||
forgejo) ;;
|
||||
*)
|
||||
echo "Error: unknown service '${svc}' — known: forgejo" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl"
|
||||
if [ ! -f "$jobspec_path" ]; then
|
||||
echo "Error: jobspec not found: ${jobspec_path}" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[deploy] [dry-run] nomad job validate ${jobspec_path}"
|
||||
echo "[deploy] [dry-run] nomad job run -detach ${jobspec_path}"
|
||||
done
|
||||
echo "[deploy] dry-run complete"
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Real run — needs root. Invoke via sudo if we're not already root so
|
||||
# the command's exit code propagates directly. We don't distinguish
|
||||
# "sudo denied" from "cluster-up.sh failed" here; both surface as a
|
||||
# non-zero exit, and cluster-up.sh's own error messages cover the
|
||||
# latter case.
|
||||
local rc=0
|
||||
# Real run: cluster-up + deploy services
|
||||
local -a cluster_cmd=("$cluster_up")
|
||||
if [ "$(id -u)" -eq 0 ]; then
|
||||
"${cmd[@]}" || rc=$?
|
||||
"${cluster_cmd[@]}" || exit $?
|
||||
else
|
||||
if ! command -v sudo >/dev/null 2>&1; then
|
||||
echo "Error: cluster-up.sh must run as root and sudo is not installed" >&2
|
||||
exit 1
|
||||
fi
|
||||
sudo -n -- "${cmd[@]}" || rc=$?
|
||||
sudo -n -- "${cluster_cmd[@]}" || exit $?
|
||||
fi
|
||||
exit "$rc"
|
||||
|
||||
# Deploy services if requested
|
||||
if [ -n "$with_services" ]; then
|
||||
echo ""
|
||||
echo "── Deploying services ─────────────────────────────────"
|
||||
local -a deploy_cmd=("$deploy_sh")
|
||||
# Split comma-separated service list into positional args
|
||||
local IFS=','
|
||||
for svc in $with_services; do
|
||||
svc=$(echo "$svc" | xargs) # trim whitespace
|
||||
if ! echo "$svc" | grep -qE '^[a-zA-Z0-9_-]+$'; then
|
||||
echo "Error: invalid service name '${svc}' — must match ^[a-zA-Z0-9_-]+$" >&2
|
||||
exit 1
|
||||
fi
|
||||
# Validate known services FIRST (before jobspec check)
|
||||
case "$svc" in
|
||||
forgejo) ;;
|
||||
*)
|
||||
echo "Error: unknown service '${svc}' — known: forgejo" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
# Check jobspec exists
|
||||
local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl"
|
||||
if [ ! -f "$jobspec_path" ]; then
|
||||
echo "Error: jobspec not found: ${jobspec_path}" >&2
|
||||
exit 1
|
||||
fi
|
||||
deploy_cmd+=("$svc")
|
||||
done
|
||||
deploy_cmd+=("--dry-run") # deploy.sh supports --dry-run
|
||||
|
||||
if [ "$(id -u)" -eq 0 ]; then
|
||||
"${deploy_cmd[@]}" || exit $?
|
||||
else
|
||||
if ! command -v sudo >/dev/null 2>&1; then
|
||||
echo "Error: deploy.sh must run as root and sudo is not installed" >&2
|
||||
exit 1
|
||||
fi
|
||||
sudo -n -- "${deploy_cmd[@]}" || exit $?
|
||||
fi
|
||||
|
||||
# Print final summary
|
||||
echo ""
|
||||
echo "── Summary ────────────────────────────────────────────"
|
||||
echo "Cluster: Nomad+Vault cluster is up"
|
||||
echo "Deployed: ${with_services}"
|
||||
if echo "$with_services" | grep -q "forgejo"; then
|
||||
echo "Ports: forgejo: 3000"
|
||||
fi
|
||||
echo "────────────────────────────────────────────────────────"
|
||||
fi
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
disinto_init() {
|
||||
|
|
@ -721,7 +803,7 @@ disinto_init() {
|
|||
fi
|
||||
|
||||
# Parse flags
|
||||
local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false
|
||||
local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false with_services=""
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--branch) branch="$2"; shift 2 ;;
|
||||
|
|
@ -730,6 +812,8 @@ disinto_init() {
|
|||
--forge-url) forge_url_flag="$2"; shift 2 ;;
|
||||
--backend) backend="$2"; shift 2 ;;
|
||||
--backend=*) backend="${1#--backend=}"; shift ;;
|
||||
--with) with_services="$2"; shift 2 ;;
|
||||
--with=*) with_services="${1#--with=}"; shift ;;
|
||||
--bare) bare=true; shift ;;
|
||||
--build) use_build=true; shift ;;
|
||||
--empty) empty=true; shift ;;
|
||||
|
|
@ -756,11 +840,15 @@ disinto_init() {
|
|||
exit 1
|
||||
fi
|
||||
|
||||
# --empty is nomad-only today (the docker path has no concept of an
|
||||
# "empty cluster"). Reject explicitly rather than letting it silently
|
||||
# do nothing on --backend=docker.
|
||||
if [ "$empty" = true ] && [ "$backend" != "nomad" ]; then
|
||||
echo "Error: --empty is only valid with --backend=nomad" >&2
|
||||
# --with requires --backend=nomad
|
||||
if [ -n "$with_services" ] && [ "$backend" != "nomad" ]; then
|
||||
echo "Error: --with requires --backend=nomad" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --empty and --with are mutually exclusive
|
||||
if [ "$empty" = true ] && [ -n "$with_services" ]; then
|
||||
echo "Error: --empty and --with are mutually exclusive" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
@ -768,7 +856,7 @@ disinto_init() {
|
|||
# (S0.4). The default and --empty variants are identical today; Step 1
|
||||
# will branch on $empty to add job deployment to the default path.
|
||||
if [ "$backend" = "nomad" ]; then
|
||||
_disinto_init_nomad "$dry_run" "$empty"
|
||||
_disinto_init_nomad "$dry_run" "$empty" "$with_services"
|
||||
# shellcheck disable=SC2317 # _disinto_init_nomad always exits today;
|
||||
# `return` is defensive against future refactors.
|
||||
return
|
||||
|
|
|
|||
|
|
@ -229,6 +229,37 @@ disinto_hire_an_agent() {
|
|||
export "${pass_var}=${user_pass}"
|
||||
fi
|
||||
|
||||
# Step 1.6: Add the new agent as a write collaborator on the project repo (#856).
|
||||
# Without this, PATCH /issues/{n} {assignees:[agent]} returns 403 Forbidden and
|
||||
# the dev-agent polls forever logging "claim lost to <none> — skipping" (see
|
||||
# issue_claim()'s post-PATCH verify). Mirrors the collaborator setup applied
|
||||
# to the canonical bot users in lib/forge-setup.sh. Idempotent: Forgejo's PUT
|
||||
# returns 204 whether the user is being added for the first time or already a
|
||||
# collaborator at the same permission.
|
||||
if [ -n "${FORGE_REPO:-}" ]; then
|
||||
echo ""
|
||||
echo "Step 1.6: Adding '${agent_name}' as write collaborator on '${FORGE_REPO}'..."
|
||||
local collab_code
|
||||
collab_code=$(curl -s -o /dev/null -w '%{http_code}' -X PUT \
|
||||
-H "Authorization: token ${admin_token}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/repos/${FORGE_REPO}/collaborators/${agent_name}" \
|
||||
-d '{"permission":"write"}')
|
||||
case "$collab_code" in
|
||||
204|201|200)
|
||||
echo " ${agent_name} is a write collaborator on ${FORGE_REPO} (HTTP ${collab_code})"
|
||||
;;
|
||||
*)
|
||||
echo " Warning: failed to add '${agent_name}' as collaborator on '${FORGE_REPO}' (HTTP ${collab_code})" >&2
|
||||
echo " The agent will not be able to claim issues until this is fixed." >&2
|
||||
;;
|
||||
esac
|
||||
else
|
||||
echo ""
|
||||
echo "Step 1.6: FORGE_REPO not set — skipping collaborator step" >&2
|
||||
echo " Warning: the agent will not be able to claim issues on the project repo" >&2
|
||||
fi
|
||||
|
||||
# Step 2: Create .profile repo on Forgejo
|
||||
echo ""
|
||||
echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..."
|
||||
|
|
|
|||
|
|
@ -126,11 +126,21 @@ issue_claim() {
|
|||
# Assign to self BEFORE adding in-progress label (issue #471).
|
||||
# This ordering ensures the assignee is set by the time other pollers
|
||||
# see the in-progress label, reducing the stale-detection race window.
|
||||
curl -sf -X PATCH \
|
||||
#
|
||||
# Capture the HTTP status instead of silently swallowing failures (#856).
|
||||
# A 403 here means the bot user is not a write collaborator on the repo —
|
||||
# previously the silent failure fell through to the post-PATCH verify which
|
||||
# only reported "claim lost to <none>", hiding the real root cause.
|
||||
local patch_code
|
||||
patch_code=$(curl -s -o /dev/null -w '%{http_code}' -X PATCH \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/issues/${issue}" \
|
||||
-d "{\"assignees\":[\"${me}\"]}" >/dev/null 2>&1 || return 1
|
||||
-d "{\"assignees\":[\"${me}\"]}")
|
||||
if [ "$patch_code" != "201" ] && [ "$patch_code" != "200" ]; then
|
||||
_ilc_log "issue #${issue} PATCH assignee failed: HTTP ${patch_code} (403 = missing write collaborator permission on ${FORGE_REPO:-repo})"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Verify the PATCH stuck. Forgejo's assignees PATCH is last-write-wins, so
|
||||
# under concurrent claims from multiple dev agents two invocations can both
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# =============================================================================
|
||||
# nomad/jobs/forgejo.nomad.hcl — Forgejo git server (Nomad service job)
|
||||
# nomad/jobs/forgejo.hcl — Forgejo git server (Nomad service job)
|
||||
#
|
||||
# Part of the Nomad+Vault migration (S1.1, issue #840). First jobspec to
|
||||
# land under nomad/jobs/ — proves the docker driver + host_volume plumbing
|
||||
|
|
@ -143,3 +143,51 @@ setup_file() {
|
|||
[[ "$output" == *"repo URL required"* ]]
|
||||
[[ "$output" != *"Unknown option"* ]]
|
||||
}
|
||||
|
||||
# ── --with flag tests ─────────────────────────────────────────────────────────
|
||||
|
||||
@test "disinto init --backend=nomad --with forgejo --dry-run prints deploy plan" {
|
||||
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo --dry-run
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"services to deploy: forgejo"* ]]
|
||||
[[ "$output" == *"[deploy] [dry-run] nomad job validate"* ]]
|
||||
[[ "$output" == *"[deploy] [dry-run] nomad job run -detach"* ]]
|
||||
[[ "$output" == *"[deploy] dry-run complete"* ]]
|
||||
}
|
||||
|
||||
@test "disinto init --backend=nomad --with forgejo,forgejo --dry-run handles comma-separated services" {
|
||||
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo,forgejo --dry-run
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"services to deploy: forgejo,forgejo"* ]]
|
||||
}
|
||||
|
||||
@test "disinto init --backend=docker --with forgejo errors with '--with requires --backend=nomad'" {
|
||||
run "$DISINTO_BIN" init placeholder/repo --backend=docker --with forgejo
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *"--with requires --backend=nomad"* ]]
|
||||
}
|
||||
|
||||
@test "disinto init --backend=nomad --empty --with forgejo errors with mutually exclusive" {
|
||||
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --empty --with forgejo
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *"--empty and --with are mutually exclusive"* ]]
|
||||
}
|
||||
|
||||
@test "disinto init --backend=nomad --with unknown-service errors with unknown service" {
|
||||
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with unknown-service --dry-run
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *"unknown service"* ]]
|
||||
[[ "$output" == *"known: forgejo"* ]]
|
||||
}
|
||||
|
||||
@test "disinto init --backend=nomad --with forgejo (flag=value syntax) works" {
|
||||
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with=forgejo --dry-run
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"services to deploy: forgejo"* ]]
|
||||
}
|
||||
|
||||
@test "disinto init --backend=nomad --with forgejo --empty --dry-run rejects in any order" {
|
||||
run "$DISINTO_BIN" init placeholder/repo --with forgejo --backend=nomad --empty --dry-run
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *"--empty and --with are mutually exclusive"* ]]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,12 +52,13 @@ setup() {
|
|||
# canned responses per endpoint. Every call gets logged as
|
||||
# `METHOD URL` (one line) to $CALLS_LOG for later grep-based asserts.
|
||||
curl() {
|
||||
local method="GET" url="" arg
|
||||
local method="GET" url="" arg want_code=""
|
||||
while [ $# -gt 0 ]; do
|
||||
arg="$1"
|
||||
case "$arg" in
|
||||
-X) method="$2"; shift 2 ;;
|
||||
-H|-d|--data-binary|-o) shift 2 ;;
|
||||
-w) want_code="$2"; shift 2 ;;
|
||||
-sf|-s|-f|--silent|--fail) shift ;;
|
||||
*) url="$arg"; shift ;;
|
||||
esac
|
||||
|
|
@ -89,7 +90,13 @@ setup() {
|
|||
fi
|
||||
;;
|
||||
"PATCH ${FORGE_API}/issues/"*)
|
||||
: # accept any PATCH; body is ignored by the mock
|
||||
# Accept any PATCH; body ignored. When caller asked for the HTTP
|
||||
# status via `-w '%{http_code}'` (issue_claim does this since #856
|
||||
# to surface 403s from missing collaborator permission), emit the
|
||||
# code configured by the scenario (default 200).
|
||||
if [ "$want_code" = '%{http_code}' ]; then
|
||||
printf '%s' "${MOCK_PATCH_CODE:-200}"
|
||||
fi
|
||||
;;
|
||||
"GET ${FORGE_API}/labels")
|
||||
printf '[]'
|
||||
|
|
@ -165,6 +172,28 @@ count_calls() {
|
|||
[ "$(count_calls GET "${FORGE_API}/labels")" -eq 0 ]
|
||||
}
|
||||
|
||||
# ── PATCH HTTP error surfacing (#856) ───────────────────────────────────────
|
||||
|
||||
@test "issue_claim logs specific HTTP code on PATCH failure (403 = missing collaborator)" {
|
||||
export MOCK_ME="bot"
|
||||
export MOCK_INITIAL_ASSIGNEE=""
|
||||
export MOCK_RECHECK_ASSIGNEE=""
|
||||
export MOCK_PATCH_CODE="403"
|
||||
|
||||
run issue_claim 42
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
# The new log message names the HTTP code explicitly — without this,
|
||||
# a missing-collaborator setup (#856) falls through to the post-PATCH
|
||||
# verify and masquerades as "claim lost to <none>".
|
||||
[[ "$output" == *"PATCH assignee failed: HTTP 403"* ]]
|
||||
|
||||
# No re-read on PATCH failure (we bail before reaching the verify step).
|
||||
[ "$(count_calls GET "${FORGE_API}/issues/42")" -eq 1 ]
|
||||
[ "$(count_calls PATCH "${FORGE_API}/issues/42")" -eq 1 ]
|
||||
[ "$(count_calls POST "${FORGE_API}/issues/42/labels")" -eq 0 ]
|
||||
}
|
||||
|
||||
# ── pre-check skip ──────────────────────────────────────────────────────────
|
||||
|
||||
@test "issue_claim skips early (no PATCH) when pre-check shows another assignee" {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue