diff --git a/lib/init/nomad/deploy.sh b/lib/init/nomad/deploy.sh index 7a58a5a..f6a48a9 100755 --- a/lib/init/nomad/deploy.sh +++ b/lib/init/nomad/deploy.sh @@ -30,7 +30,7 @@ set -euo pipefail # ── Configuration ──────────────────────────────────────────────────────────── SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../../.." && pwd)}" +REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../.." && pwd)}" JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-120}" DRY_RUN=0 @@ -96,14 +96,28 @@ _wait_job_running() { log "job '${job_name}' is now running" return 0 ;; - complete) + complete|dead|failed) + # Check allocations for partial success + local allocs_running + allocs_running=$(printf '%s' "$status_json" \ + | jq '[.Evaluations[].Allocations[]? | select(.Status == "running")] | length' 2>/dev/null) || allocs_running=0 + local allocs_total + allocs_total=$(printf '%s' "$status_json" \ + | jq '[.Evaluations[].Allocations[]? | length] | add' 2>/dev/null) || allocs_total=0 + + if [ "$allocs_running" -gt 0 ]; then + log "job '${job_name}' has ${allocs_running}/${allocs_total} allocations running" + # If not all running but some are, keep waiting + if [ "$allocs_running" -lt "$allocs_total" ]; then + sleep 5 + elapsed=$((elapsed + 5)) + continue + fi + fi + log "job '${job_name}' reached terminal state: ${status}" return 0 ;; - dead|failed) - log "job '${job_name}' reached terminal state: ${status}" - return 1 - ;; *) log "job '${job_name}' status: ${status} (waiting...)" ;; @@ -156,13 +170,10 @@ for job_name in "${JOBS[@]}"; do fi # 2. Check if already running (idempotency) - job_status_json=$(nomad job status -json "$job_name" 2>/dev/null || true) - if [ -n "$job_status_json" ]; then - current_status=$(printf '%s' "$job_status_json" | jq -r '.Status' 2>/dev/null || true) - if [ "$current_status" = "running" ]; then - log "${job_name} already running" - continue - fi + job_status=$(nomad job status "$job_name" 2>/dev/null | head -1 || true) + if printf '%s' "$job_status" | grep -qi "running"; then + log "${job_name} already running" + continue fi # 3. Run the job (idempotent registration)