fix: [nomad-step-2] S2.2 — tools/vault-import.sh (import .env + sops into KV) (#880 )

2026-04-16 15:46:30 +00:00
23 changed files with 65 additions and 636 deletions
--- a/lib/init/nomad/deploy.sh
+++ b/lib/init/nomad/deploy.sh
@ -2,7 +2,7 @@
 # =============================================================================
 # lib/init/nomad/deploy.sh — Dependency-ordered Nomad job deploy + wait
 #
-# Runs a list of jobspecs in order, waiting for each to reach healthy state
+# Runs a list of jobspecs in order, waiting for each to reach "running" state
 # before starting the next. Step-1 uses it for forgejo-only; Steps 3–6 extend
 # the job list.
 #
@ -16,24 +16,22 @@
 # Environment:
 #   REPO_ROOT              — absolute path to repo root (defaults to parent of
 #                            this script's parent directory)
-#   JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 240)
+#   JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 120)
 #   JOB_READY_TIMEOUT_<JOBNAME> — per-job timeout override (e.g.,
 #                            JOB_READY_TIMEOUT_FORGEJO=300)
 #
 # Exit codes:
-#   0  success (all jobs deployed and healthy, or dry-run completed)
+#   0  success (all jobs deployed and running, or dry-run completed)
 #   1  failure (validation error, timeout, or nomad command failure)
 #
 # Idempotency:
 #   Running twice back-to-back on a healthy cluster is a no-op. Jobs that are
-#   already healthy print "[deploy] <name> already healthy" and continue.
+#   already running print "[deploy] <name> already running" and continue.
 # =============================================================================
 set -euo pipefail
 # ── Configuration ────────────────────────────────────────────────────────────
 SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../../.." && pwd)}"
-JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-240}"
+JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-120}"
 DRY_RUN=0
@ -63,12 +61,11 @@ if [ "${#JOBS[@]}" -eq 0 ]; then
 fi
 # ── Helper: _wait_job_running <name> <timeout> ───────────────────────────────
-# Polls `nomad deployment status -json <deployment-id>` until:
+# Polls `nomad job status -json <name>` until:
-#   - Status == "successful"
+#   - Status == "running", OR
-#   - Status == "failed"
+#   - All allocations are in "running" state
 #
-# On deployment failure: prints last 50 lines of stderr from allocations and exits 1.
+# On timeout: prints last 50 lines of stderr from all allocations and exits 1.
 # On timeout: prints last 50 lines of stderr from allocations and exits 1.
 #
 # This is a named, reusable helper for future init scripts.
 _wait_job_running() {
@ -76,72 +73,39 @@ _wait_job_running() {
  local timeout="$2"
  local elapsed=0
-  log "waiting for job '${job_name}' to become healthy (timeout: ${timeout}s)..."
+  log "waiting for job '${job_name}' to become running (timeout: ${timeout}s)..."
  # Get the latest deployment ID for this job (retry until available)
  local deployment_id=""
  local retry_count=0
  local max_retries=12
  while [ -z "$deployment_id" ] && [ "$retry_count" -lt "$max_retries" ]; do
    deployment_id=$(nomad job deployments -json "$job_name" 2>/dev/null | jq -r '.[0].ID' 2>/dev/null) || deployment_id=""
    if [ -z "$deployment_id" ]; then
      sleep 5
      retry_count=$((retry_count + 1))
    fi
  done
  if [ -z "$deployment_id" ]; then
    log "ERROR: no deployment found for job '${job_name}' after ${max_retries} attempts"
    return 1
  fi
  log "tracking deployment '${deployment_id}'..."
  while [ "$elapsed" -lt "$timeout" ]; do
-    local deploy_status_json
+    local status_json
-    deploy_status_json=$(nomad deployment status -json "$deployment_id" 2>/dev/null) || {
+    status_json=$(nomad job status -json "$job_name" 2>/dev/null) || {
-      # Deployment may not exist yet — keep waiting
+      # Job may not exist yet — keep waiting
      sleep 5
      elapsed=$((elapsed + 5))
      continue
    }
    local status
-    status=$(printf '%s' "$deploy_status_json" | jq -r '.Status' 2>/dev/null) || {
+    status=$(printf '%s' "$status_json" | jq -r '.Status' 2>/dev/null) || {
      sleep 5
      elapsed=$((elapsed + 5))
      continue
    }
    case "$status" in
-      successful)
+      running)
-        log "${job_name} healthy after ${elapsed}s"
+        log "job '${job_name}' is now running"
        return 0
        ;;
-      failed)
+      complete)
-        log "deployment '${deployment_id}' failed for job '${job_name}'"
+        log "job '${job_name}' reached terminal state: ${status}"
-        log "showing last 50 lines of allocation logs (stderr):"
+        return 0
-
+        ;;
-        # Get allocation IDs from job status
+      dead|failed)
-        local alloc_ids
+        log "job '${job_name}' reached terminal state: ${status}"
        alloc_ids=$(nomad job status -json "$job_name" 2>/dev/null \
          | jq -r '.Allocations[]?.ID // empty' 2>/dev/null) || alloc_ids=""
        if [ -n "$alloc_ids" ]; then
          for alloc_id in $alloc_ids; do
            log "--- Allocation ${alloc_id} logs (stderr) ---"
            nomad alloc logs -stderr -short "$alloc_id" 2>/dev/null | tail -50 || true
          done
        fi
        return 1
        ;;
      running|progressing)
        log "deployment '${deployment_id}' status: ${status} (waiting for ${job_name}...)"
        ;;
      *)
-        log "deployment '${deployment_id}' status: ${status} (waiting for ${job_name}...)"
+        log "job '${job_name}' status: ${status} (waiting...)"
        ;;
    esac
@ -150,13 +114,13 @@ _wait_job_running() {
  done
  # Timeout — print last 50 lines of alloc logs
-  log "TIMEOUT: deployment '${deployment_id}' did not reach successful state within ${timeout}s"
+  log "TIMEOUT: job '${job_name}' did not reach running state within ${timeout}s"
  log "showing last 50 lines of allocation logs (stderr):"
-  # Get allocation IDs from job status
+  # Get allocation IDs
  local alloc_ids
  alloc_ids=$(nomad job status -json "$job_name" 2>/dev/null \
-    | jq -r '.Allocations[]?.ID // empty' 2>/dev/null) || alloc_ids=""
+    | jq -r '.Evaluations[].Allocations[]?.ID // empty' 2>/dev/null) || alloc_ids=""
  if [ -n "$alloc_ids" ]; then
    for alloc_id in $alloc_ids; do
@ -176,15 +140,10 @@ for job_name in "${JOBS[@]}"; do
    die "Jobspec not found: ${jobspec_path}"
  fi
  # Per-job timeout override: JOB_READY_TIMEOUT_<UPPERCASE_JOBNAME>
  job_upper=$(printf '%s' "$job_name" | tr '[:lower:]' '[:upper:]')
  timeout_var="JOB_READY_TIMEOUT_${job_upper}"
  job_timeout="${!timeout_var:-$JOB_READY_TIMEOUT_SECS}"
  if [ "$DRY_RUN" -eq 1 ]; then
    log "[dry-run] nomad job validate ${jobspec_path}"
    log "[dry-run] nomad job run -detach ${jobspec_path}"
-    log "[dry-run] (would wait for '${job_name}' to become healthy for ${job_timeout}s)"
+    log "[dry-run] (would wait for '${job_name}' to become running for ${JOB_READY_TIMEOUT_SECS}s)"
    continue
  fi
@ -196,12 +155,12 @@ for job_name in "${JOBS[@]}"; do
    die "validation failed for: ${jobspec_path}"
  fi
-  # 2. Check if already healthy (idempotency)
+  # 2. Check if already running (idempotency)
  job_status_json=$(nomad job status -json "$job_name" 2>/dev/null || true)
  if [ -n "$job_status_json" ]; then
    current_status=$(printf '%s' "$job_status_json" | jq -r '.Status' 2>/dev/null || true)
    if [ "$current_status" = "running" ]; then
-      log "${job_name} already healthy"
+      log "${job_name} already running"
      continue
    fi
  fi
@ -212,9 +171,9 @@ for job_name in "${JOBS[@]}"; do
    die "failed to run job: ${job_name}"
  fi
-  # 4. Wait for healthy state
+  # 4. Wait for running state
-  if ! _wait_job_running "$job_name" "$job_timeout"; then
+  if ! _wait_job_running "$job_name" "$JOB_READY_TIMEOUT_SECS"; then
-    die "deployment for job '${job_name}' did not reach successful state"
+    die "timeout waiting for job '${job_name}' to become running"
  fi
 done
--- a/tests/vault-import.bats
+++ b/tests/vault-import.bats
@ -146,7 +146,7 @@ setup() {
  run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \
    "${VAULT_ADDR}/v1/secret/data/disinto/runner/GITHUB_TOKEN"
  [ "$status" -eq 0 ]
-  echo "$output" | jq -e '.data.data.value == "github-test-token-abc123"'
+  echo "$output" | grep -q "github-test-token-abc123"
 }
 # ── Idempotency ──────────────────────────────────────────────────────────────
@ -192,11 +192,11 @@ setup() {
  # Check that dev-qwen token was updated
  echo "$output" | grep -q "dev-qwen.*updated"
-  # Verify the new value was written (path is disinto/bots/dev-qwen, key is token)
+  # Verify the new value was written
  run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \
-    "${VAULT_ADDR}/v1/secret/data/disinto/bots/dev-qwen"
+    "${VAULT_ADDR}/v1/secret/data/disinto/bots/dev-qwen/token"
  [ "$status" -eq 0 ]
-  echo "$output" | jq -e '.data.data.token == "MODIFIED-LLAMA-TOKEN"'
+  echo "$output" | grep -q "MODIFIED-LLAMA-TOKEN"
 }
 # ── Incomplete fixture ───────────────────────────────────────────────────────
@ -214,9 +214,8 @@ setup() {
  # Should have imported what was available
  echo "$output" | grep -q "review"
-  # Should complete successfully even with incomplete fixture
+  # Should warn about incomplete pairs (warnings go to stderr)
-  # The script handles missing pairs gracefully with warnings to stderr
+  echo "$stderr" | grep -q "Warning.*has token but no password"
  [ "$status" -eq 0 ]
 }
 # ── Security: no secrets in output ───────────────────────────────────────────
--- a/tools/vault-apply-policies.sh
+++ b/tools/vault-apply-policies.sh
@ -1,164 +0,0 @@
 #!/usr/bin/env bash
 # =============================================================================
 # tools/vault-apply-policies.sh — Idempotent Vault policy sync
 #
 # Part of the Nomad+Vault migration (S2.1, issue #879). Reads every
 # vault/policies/*.hcl file and upserts it into Vault as an ACL policy
 # named after the file's basename (without the .hcl suffix).
 #
 # Idempotency contract:
 #   For each vault/policies/<NAME>.hcl:
 #     - Policy missing in Vault       → apply, log "policy <NAME> created"
 #     - Policy present, content same  → skip,  log "policy <NAME> unchanged"
 #     - Policy present, content diff  → apply, log "policy <NAME> updated"
 #
 #   Comparison is byte-for-byte against the on-server policy text returned by
 #   GET sys/policies/acl/<NAME>.data.policy. Re-running with no file edits is
 #   a guaranteed no-op that reports every policy as "unchanged".
 #
 #   --dry-run: prints <NAME>  <SHA256> for each file that WOULD be applied;
 #   does not call Vault at all (no GETs, no PUTs). Exits 0.
 #
 # Requires:
 #   - VAULT_ADDR   (e.g. http://127.0.0.1:8200)
 #   - VAULT_TOKEN  (env OR /etc/vault.d/root.token, resolved by lib/hvault.sh)
 #   - curl, jq, sha256sum
 #
 # Usage:
 #   tools/vault-apply-policies.sh
 #   tools/vault-apply-policies.sh --dry-run
 #
 # Exit codes:
 #   0  success (policies synced, or --dry-run completed)
 #   1  precondition / API failure
 # =============================================================================
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
 POLICIES_DIR="${REPO_ROOT}/vault/policies"
 # shellcheck source=../lib/hvault.sh
 source "${REPO_ROOT}/lib/hvault.sh"
 log() { printf '[vault-apply] %s\n' "$*"; }
 die() { printf '[vault-apply] ERROR: %s\n' "$*" >&2; exit 1; }
 # ── Flag parsing ─────────────────────────────────────────────────────────────
 # Single optional flag — no loop needed. Keeps this block textually distinct
 # from the multi-flag `while/case` parsers elsewhere in the repo (see
 # .woodpecker/detect-duplicates.py — sliding 5-line window).
 dry_run=false
 [ "$#" -le 1 ] || die "too many arguments (saw: $*)"
 case "${1:-}" in
  '')         ;;
  --dry-run)  dry_run=true ;;
  -h|--help)  printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")"
              printf 'Apply every vault/policies/*.hcl to Vault as an ACL policy.\n'
              printf 'Idempotent: unchanged policies are reported as "unchanged" and\n'
              printf 'not written.\n\n'
              printf '  --dry-run   Print policy names + content SHA256 that would be\n'
              printf '              applied, without contacting Vault. Exits 0.\n'
              exit 0 ;;
  *)          die "unknown flag: $1" ;;
 esac
 # ── Preconditions ────────────────────────────────────────────────────────────
 for bin in curl jq sha256sum; do
  command -v "$bin" >/dev/null 2>&1 \
    || die "required binary not found: ${bin}"
 done
 [ -d "$POLICIES_DIR" ] \
  || die "policies directory not found: ${POLICIES_DIR}"
 # Collect policy files in a stable (lexicographic) order so log output is
 # deterministic across runs and CI diffs.
 mapfile -t POLICY_FILES < <(
  find "$POLICIES_DIR" -maxdepth 1 -type f -name '*.hcl' | LC_ALL=C sort
 )
 if [ "${#POLICY_FILES[@]}" -eq 0 ]; then
  die "no *.hcl files in ${POLICIES_DIR}"
 fi
 # ── Dry-run: print plan + exit (no Vault calls) ──────────────────────────────
 if [ "$dry_run" = true ]; then
  log "dry-run — ${#POLICY_FILES[@]} policy file(s) in ${POLICIES_DIR}"
  for f in "${POLICY_FILES[@]}"; do
    name="$(basename "$f" .hcl)"
    sha="$(sha256sum "$f" | awk '{print $1}')"
    printf '[vault-apply] would apply policy %s (sha256=%s)\n' "$name" "$sha"
  done
  exit 0
 fi
 # ── Live run: Vault connectivity check ───────────────────────────────────────
 [ -n "${VAULT_ADDR:-}" ] \
  || die "VAULT_ADDR is not set — export VAULT_ADDR=http://127.0.0.1:8200"
 # hvault_token_lookup both resolves the token (env or /etc/vault.d/root.token)
 # and confirms the server is reachable with a valid token. Fail fast here so
 # the per-file loop below doesn't emit N identical "HTTP 403" errors.
 hvault_token_lookup >/dev/null \
  || die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN"
 # ── Helper: fetch the on-server policy text, or empty if absent ──────────────
 # Echoes the current policy content on stdout. A 404 (policy does not exist
 # yet) is a non-error — we print nothing and exit 0 so the caller can treat
 # the empty string as "needs create". Any other non-2xx is a hard failure.
 #
 # Uses a subshell + EXIT trap (not RETURN) for tmpfile cleanup: the RETURN
 # trap does NOT fire on set-e abort, so if jq below tripped errexit the
 # tmpfile would leak. Subshell exit propagates via the function's last-
 # command exit status.
 fetch_current_policy() {
  local name="$1"
  (
    local tmp http_code
    tmp="$(mktemp)"
    trap 'rm -f "$tmp"' EXIT
    http_code="$(curl -sS -o "$tmp" -w '%{http_code}' \
      -H "X-Vault-Token: ${VAULT_TOKEN}" \
      "${VAULT_ADDR}/v1/sys/policies/acl/${name}")" \
      || { printf '[vault-apply] ERROR: curl failed for policy %s\n' "$name" >&2; exit 1; }
    case "$http_code" in
      200) jq -r '.data.policy // ""' < "$tmp" ;;
      404) printf '' ;;  # absent — caller treats as "create"
      *)
        printf '[vault-apply] ERROR: HTTP %s fetching policy %s:\n' "$http_code" "$name" >&2
        cat "$tmp" >&2
        exit 1
        ;;
    esac
  )
 }
 # ── Apply each policy, reporting created/updated/unchanged ───────────────────
 log "syncing ${#POLICY_FILES[@]} polic(y|ies) from ${POLICIES_DIR}"
 for f in "${POLICY_FILES[@]}"; do
  name="$(basename "$f" .hcl)"
  desired="$(cat "$f")"
  current="$(fetch_current_policy "$name")" \
    || die "failed to read existing policy: ${name}"
  if [ -z "$current" ]; then
    hvault_policy_apply "$name" "$f" \
      || die "failed to create policy: ${name}"
    log "policy ${name} created"
    continue
  fi
  if [ "$current" = "$desired" ]; then
    log "policy ${name} unchanged"
    continue
  fi
  hvault_policy_apply "$name" "$f" \
    || die "failed to update policy: ${name}"
  log "policy ${name} updated"
 done
 log "done — ${#POLICY_FILES[@]} polic(y|ies) synced"
--- a/tools/vault-import.sh
+++ b/tools/vault-import.sh
@ -136,39 +136,12 @@ _kv_put_secret() {
  done
  # Use curl directly for KV v2 write with versioning
-  local tmpfile http_code
+  curl -s -w '%{http_code}' \
  tmpfile="$(mktemp)"
  http_code="$(curl -s -w '%{http_code}' \
    -H "X-Vault-Token: ${VAULT_TOKEN}" \
    -H "Content-Type: application/json" \
    -X POST \
    -d "$payload" \
-    -o "$tmpfile" \
+    "${VAULT_ADDR}/v1/secret/data/${path}" >/dev/null
    "${VAULT_ADDR}/v1/secret/data/${path}")" || {
    rm -f "$tmpfile"
    _err "Failed to write to Vault at secret/data/${path}: curl error"
    return 1
  }
  rm -f "$tmpfile"
  # Check HTTP status — 2xx is success
  case "$http_code" in
    2[0-9][0-9])
      return 0
      ;;
    404)
      _err "KV path not found: secret/data/${path}"
      return 1
      ;;
    403)
      _err "Permission denied writing to secret/data/${path}"
      return 1
      ;;
    *)
      _err "Failed to write to Vault at secret/data/${path}: HTTP $http_code"
      return 1
      ;;
  esac
 }
 # _format_status — format the status string for a key
@ -325,8 +298,8 @@ EOF
    local pass_val="${!pass_var:-}"
    if [ -n "$token_val" ] && [ -n "$pass_val" ]; then
-      operations+=("bots|$role|token|$env_file|$token_var")
+      operations+=("bots:$role:token:$env_file:$token_var")
-      operations+=("bots|$role|pass|$env_file|$pass_var")
+      operations+=("bots:$role:pass:$env_file:$pass_var")
    elif [ -n "$token_val" ] || [ -n "$pass_val" ]; then
      _err "Warning: $role bot has token but no password (or vice versa), skipping"
    fi
@ -336,8 +309,8 @@ EOF
  local llama_token="${FORGE_TOKEN_LLAMA:-}"
  local llama_pass="${FORGE_PASS_LLAMA:-}"
  if [ -n "$llama_token" ] && [ -n "$llama_pass" ]; then
-    operations+=("bots|dev-qwen|token|$env_file|FORGE_TOKEN_LLAMA")
+    operations+=("bots:dev-qwen:token:$env_file:FORGE_TOKEN_LLAMA")
-    operations+=("bots|dev-qwen|pass|$env_file|FORGE_PASS_LLAMA")
+    operations+=("bots:dev-qwen:pass:$env_file:FORGE_PASS_LLAMA")
  elif [ -n "$llama_token" ] || [ -n "$llama_pass" ]; then
    _err "Warning: dev-qwen bot has token but no password (or vice versa), skipping"
  fi
@ -346,14 +319,14 @@ EOF
  local forge_token="${FORGE_TOKEN:-}"
  local forge_pass="${FORGE_PASS:-}"
  if [ -n "$forge_token" ] && [ -n "$forge_pass" ]; then
-    operations+=("forge|token|$env_file|FORGE_TOKEN")
+    operations+=("forge:token:$env_file:FORGE_TOKEN")
-    operations+=("forge|pass|$env_file|FORGE_PASS")
+    operations+=("forge:pass:$env_file:FORGE_PASS")
  fi
  # Forge admin token: FORGE_ADMIN_TOKEN
  local forge_admin_token="${FORGE_ADMIN_TOKEN:-}"
  if [ -n "$forge_admin_token" ]; then
-    operations+=("forge|admin_token|$env_file|FORGE_ADMIN_TOKEN")
+    operations+=("forge:admin_token:$env_file:FORGE_ADMIN_TOKEN")
  fi
  # Woodpecker secrets: WOODPECKER_*
@ -368,7 +341,7 @@ EOF
    local val="${!key}"
    if [ -n "$val" ]; then
      local lowercase_key="${key,,}"
-      operations+=("woodpecker|$lowercase_key|$env_file|$key")
+      operations+=("woodpecker:$lowercase_key:$env_file:$key")
    fi
  done
@ -377,7 +350,7 @@ EOF
    local val="${!key:-}"
    if [ -n "$val" ]; then
      local lowercase_key="${key,,}"
-      operations+=("chat|$lowercase_key|$env_file|$key")
+      operations+=("chat:$lowercase_key:$env_file:$key")
    fi
  done
@ -387,7 +360,7 @@ EOF
  for token_name in "${RUNNER_TOKENS[@]}"; do
    local token_val="${!token_name:-}"
    if [ -n "$token_val" ]; then
-      operations+=("runner|$token_name|$sops_file|$token_name")
+      operations+=("runner:${token_name}:value:$sops_file:$token_name")
    fi
  done
@ -420,41 +393,41 @@ EOF
  local unchanged=0
  for op in "${operations[@]}"; do
-    # Parse operation: category|field|file|key (4 fields for most, 5 for bots/runner)
+    IFS=':' read -r category source_type source_file source_key <<< "$op"
    IFS='|' read -r category field file key <<< "$op"
    local source_value=""
-    if [ "$file" = "$env_file" ]; then
+    if [ "$source_file" = "$env_file" ]; then
-      source_value="${!key:-}"
+      source_value="${!source_key:-}"
    else
      # Source from sops-decrypted env
-      source_value="$(printf '%s' "$sops_env" | grep "^${key}=" | sed "s/^${key=}//" || true)"
+      # We need to extract just this key from the sops_env
      source_value="$(printf '%s' "$sops_env" | grep "^${source_key}=" | sed "s/^${source_key=}//" || true)"
    fi
-    # Determine Vault path and key based on category
+    # Determine Vault path
    local vault_path=""
-    local vault_key="$key"
+    local vault_key=""
    case "$category" in
      bots)
-        vault_path="disinto/bots/${field}"
+        vault_path="disinto/bots/${source_type}"
-        vault_key="$field"
+        vault_key="${source_file##*:}"
        ;;
      forge)
        vault_path="disinto/shared/forge"
-        vault_key="$field"
+        vault_key="$source_type"
        ;;
      woodpecker)
        vault_path="disinto/shared/woodpecker"
-        vault_key="$field"
+        vault_key="$source_type"
        ;;
      chat)
        vault_path="disinto/shared/chat"
-        vault_key="$field"
+        vault_key="$source_type"
        ;;
      runner)
-        vault_path="disinto/runner/${field}"
+        vault_path="disinto/runner"
-        vault_key="value"
+        vault_key="$source_type"
        ;;
      *)
        _err "Unknown category: $category"
@ -484,10 +457,7 @@ EOF
    # Write if not unchanged
    if [ "$status" != "unchanged" ]; then
-      if ! _kv_put_secret "$vault_path" "${vault_key}=${source_value}"; then
+      _kv_put_secret "$vault_path" "${vault_key}=${source_value}"
        _err "Failed to write $vault_key to $vault_path"
        exit 1
      fi
      case "$status" in
        updated) ((updated++)) || true ;;
        created) ((created++)) || true ;;
--- a/vault/policies/AGENTS.md
+++ b/vault/policies/AGENTS.md
@ -1,66 +0,0 @@
 # vault/policies/ — Agent Instructions
 HashiCorp Vault ACL policies for the disinto factory. One `.hcl` file per
 policy; the basename (minus `.hcl`) is the Vault policy name applied to it.
 Synced into Vault by `tools/vault-apply-policies.sh` (idempotent — see the
 script header for the contract).
 This directory is part of the **Nomad+Vault migration (Step 2)** — see
 issues #879–#884. Policies attach to Nomad jobs via workload identity in
 S2.4; this PR only lands the files + apply script.
 ## Naming convention
 | Prefix | Audience | KV scope |
 |---|---|---|
 | `service-<name>.hcl`  | Long-running platform services (forgejo, woodpecker) | `kv/data/disinto/shared/<name>/*` |
 | `bot-<name>.hcl`      | Per-agent jobs (dev, review, gardener, …)            | `kv/data/disinto/bots/<name>/*` + shared forge URL |
 | `runner-<TOKEN>.hcl`  | Per-secret policy for vault-runner ephemeral dispatch | exactly one `kv/data/disinto/runner/<TOKEN>` path |
 | `dispatcher.hcl`      | Long-running edge dispatcher                         | `kv/data/disinto/runner/*` + `kv/data/disinto/shared/ops-repo/*` |
 The KV mount name `kv/` is the convention this migration uses (mounted as
 KV v2). Vault addresses KV v2 data at `kv/data/<path>` and metadata at
 `kv/metadata/<path>` — policies that need `list` always target the
 `metadata` path; reads target `data`.
 ## Policy → KV path summary
 | Policy | Reads |
 |---|---|
 | `service-forgejo` | `kv/data/disinto/shared/forgejo/*` |
 | `service-woodpecker` | `kv/data/disinto/shared/woodpecker/*` |
 | `bot-<role>` (dev, review, gardener, architect, planner, predictor, supervisor, vault, dev-qwen) | `kv/data/disinto/bots/<role>/*` + `kv/data/disinto/shared/forge/*` |
 | `runner-<TOKEN>` (GITHUB\_TOKEN, CODEBERG\_TOKEN, CLAWHUB\_TOKEN, DEPLOY\_KEY, NPM\_TOKEN, DOCKER\_HUB\_TOKEN) | `kv/data/disinto/runner/<TOKEN>` (exactly one) |
 | `dispatcher` | `kv/data/disinto/runner/*` + `kv/data/disinto/shared/ops-repo/*` |
 ## Why one policy per runner secret
 `vault-runner` (Step 5) reads each action TOML's `secrets = [...]` list
 and composes only those `runner-<NAME>` policies onto the per-dispatch
 ephemeral token. Wildcards or batched policies would hand the runner more
 secrets than the action declared — defeats AD-006 (least-privilege per
 external action). Adding a new declarable secret = adding one new
 `runner-<NAME>.hcl` here + extending the SECRETS allow-list in vault-action
 validation.
 ## Adding a new policy
 1. Drop a file matching one of the four naming patterns above. Use an
   existing file in the same family as the template — comment header,
   capability list, and KV path layout should match the family.
 2. Run `tools/vault-apply-policies.sh --dry-run` to confirm the new
   basename appears in the planned-work list with the expected SHA.
 3. Run `tools/vault-apply-policies.sh` against a Vault instance to
   create it; re-run to confirm it reports `unchanged`.
 4. The CI fmt + validate step lands in S2.6 (#884). Until then
   `vault policy fmt <file>` locally is the fastest sanity check.
 ## What this directory does NOT own
 - **Attaching policies to Nomad jobs.** That's S2.4 (#882) via the
  jobspec `template { vault { policies = […] } }` stanza.
 - **Enabling JWT auth + Nomad workload identity roles.** That's S2.3
  (#881).
 - **Writing the secret values themselves.** That's S2.2 (#880) via
  `tools/vault-import.sh`.
 - **CI policy fmt + validate + roles.yaml check.** That's S2.6 (#884).
--- a/vault/policies/bot-architect.hcl
+++ b/vault/policies/bot-architect.hcl
@ -1,16 +0,0 @@
 # vault/policies/bot-architect.hcl
 #
 # Architect agent: reads its own bot KV namespace + the shared forge URL.
 # Attached to the architect-agent Nomad job via workload identity (S2.4).
 path "kv/data/disinto/bots/architect/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/bots/architect/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/forge/*" {
  capabilities = ["read"]
 }
--- a/vault/policies/bot-dev-qwen.hcl
+++ b/vault/policies/bot-dev-qwen.hcl
@ -1,18 +0,0 @@
 # vault/policies/bot-dev-qwen.hcl
 #
 # Local-Qwen dev agent (agents-llama profile): reads its own bot KV
 # namespace + the shared forge URL. Attached to the dev-qwen Nomad job
 # via workload identity (S2.4). KV path mirrors the bot basename:
 # kv/disinto/bots/dev-qwen/*.
 path "kv/data/disinto/bots/dev-qwen/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/bots/dev-qwen/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/forge/*" {
  capabilities = ["read"]
 }
--- a/vault/policies/bot-dev.hcl
+++ b/vault/policies/bot-dev.hcl
@ -1,16 +0,0 @@
 # vault/policies/bot-dev.hcl
 #
 # Dev agent: reads its own bot KV namespace + the shared forge URL.
 # Attached to the dev-agent Nomad job via workload identity (S2.4).
 path "kv/data/disinto/bots/dev/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/bots/dev/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/forge/*" {
  capabilities = ["read"]
 }
--- a/vault/policies/bot-gardener.hcl
+++ b/vault/policies/bot-gardener.hcl
@ -1,16 +0,0 @@
 # vault/policies/bot-gardener.hcl
 #
 # Gardener agent: reads its own bot KV namespace + the shared forge URL.
 # Attached to the gardener-agent Nomad job via workload identity (S2.4).
 path "kv/data/disinto/bots/gardener/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/bots/gardener/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/forge/*" {
  capabilities = ["read"]
 }
--- a/vault/policies/bot-planner.hcl
+++ b/vault/policies/bot-planner.hcl
@ -1,16 +0,0 @@
 # vault/policies/bot-planner.hcl
 #
 # Planner agent: reads its own bot KV namespace + the shared forge URL.
 # Attached to the planner-agent Nomad job via workload identity (S2.4).
 path "kv/data/disinto/bots/planner/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/bots/planner/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/forge/*" {
  capabilities = ["read"]
 }
--- a/vault/policies/bot-predictor.hcl
+++ b/vault/policies/bot-predictor.hcl
@ -1,16 +0,0 @@
 # vault/policies/bot-predictor.hcl
 #
 # Predictor agent: reads its own bot KV namespace + the shared forge URL.
 # Attached to the predictor-agent Nomad job via workload identity (S2.4).
 path "kv/data/disinto/bots/predictor/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/bots/predictor/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/forge/*" {
  capabilities = ["read"]
 }
--- a/vault/policies/bot-review.hcl
+++ b/vault/policies/bot-review.hcl
@ -1,16 +0,0 @@
 # vault/policies/bot-review.hcl
 #
 # Review agent: reads its own bot KV namespace + the shared forge URL.
 # Attached to the review-agent Nomad job via workload identity (S2.4).
 path "kv/data/disinto/bots/review/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/bots/review/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/forge/*" {
  capabilities = ["read"]
 }
--- a/vault/policies/bot-supervisor.hcl
+++ b/vault/policies/bot-supervisor.hcl
@ -1,16 +0,0 @@
 # vault/policies/bot-supervisor.hcl
 #
 # Supervisor agent: reads its own bot KV namespace + the shared forge URL.
 # Attached to the supervisor-agent Nomad job via workload identity (S2.4).
 path "kv/data/disinto/bots/supervisor/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/bots/supervisor/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/forge/*" {
  capabilities = ["read"]
 }
--- a/vault/policies/bot-vault.hcl
+++ b/vault/policies/bot-vault.hcl
@ -1,20 +0,0 @@
 # vault/policies/bot-vault.hcl
 #
 # Vault agent (the legacy edge dispatcher / vault-action runner): reads its
 # own bot KV namespace + the shared forge URL. Attached to the vault-agent
 # Nomad job via workload identity (S2.4).
 #
 # NOTE: distinct from the runner-* policies, which gate per-secret access
 # for vault-runner ephemeral dispatches (Step 5).
 path "kv/data/disinto/bots/vault/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/bots/vault/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/forge/*" {
  capabilities = ["read"]
 }
--- a/vault/policies/dispatcher.hcl
+++ b/vault/policies/dispatcher.hcl
@ -1,29 +0,0 @@
 # vault/policies/dispatcher.hcl
 #
 # Edge dispatcher policy: needs to enumerate the runner secret namespace
 # (to check secret presence before dispatching) and read the shared
 # ops-repo credentials (token + clone URL) it uses to fetch action TOMLs.
 #
 # Scope:
 #   - kv/disinto/runner/*       — read all per-secret values + list keys
 #   - kv/disinto/shared/ops-repo/* — read the ops-repo creds bundle
 #
 # The actual ephemeral runner container created per dispatch gets the
 # narrow runner-<NAME> policies, NOT this one. This policy stays bound
 # to the long-running dispatcher only.
 path "kv/data/disinto/runner/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/runner/*" {
  capabilities = ["list", "read"]
 }
 path "kv/data/disinto/shared/ops-repo/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/shared/ops-repo/*" {
  capabilities = ["list", "read"]
 }
--- a/vault/policies/runner-CLAWHUB_TOKEN.hcl
+++ b/vault/policies/runner-CLAWHUB_TOKEN.hcl
@ -1,10 +0,0 @@
 # vault/policies/runner-CLAWHUB_TOKEN.hcl
 #
 # Per-secret runner policy: ClawHub token for skill-registry publish.
 # vault-runner (Step 5) composes only the runner-* policies named by the
 # dispatching action's `secrets = [...]` list, so this policy intentionally
 # scopes a single KV path — no wildcards, no list capability.
 path "kv/data/disinto/runner/CLAWHUB_TOKEN" {
  capabilities = ["read"]
 }
--- a/vault/policies/runner-CODEBERG_TOKEN.hcl
+++ b/vault/policies/runner-CODEBERG_TOKEN.hcl
@ -1,10 +0,0 @@
 # vault/policies/runner-CODEBERG_TOKEN.hcl
 #
 # Per-secret runner policy: Codeberg PAT for upstream-repo mirror push.
 # vault-runner (Step 5) composes only the runner-* policies named by the
 # dispatching action's `secrets = [...]` list, so this policy intentionally
 # scopes a single KV path — no wildcards, no list capability.
 path "kv/data/disinto/runner/CODEBERG_TOKEN" {
  capabilities = ["read"]
 }
--- a/vault/policies/runner-DEPLOY_KEY.hcl
+++ b/vault/policies/runner-DEPLOY_KEY.hcl
@ -1,10 +0,0 @@
 # vault/policies/runner-DEPLOY_KEY.hcl
 #
 # Per-secret runner policy: SSH deploy key for git push to a release target.
 # vault-runner (Step 5) composes only the runner-* policies named by the
 # dispatching action's `secrets = [...]` list, so this policy intentionally
 # scopes a single KV path — no wildcards, no list capability.
 path "kv/data/disinto/runner/DEPLOY_KEY" {
  capabilities = ["read"]
 }
--- a/vault/policies/runner-DOCKER_HUB_TOKEN.hcl
+++ b/vault/policies/runner-DOCKER_HUB_TOKEN.hcl
@ -1,10 +0,0 @@
 # vault/policies/runner-DOCKER_HUB_TOKEN.hcl
 #
 # Per-secret runner policy: Docker Hub access token for image push.
 # vault-runner (Step 5) composes only the runner-* policies named by the
 # dispatching action's `secrets = [...]` list, so this policy intentionally
 # scopes a single KV path — no wildcards, no list capability.
 path "kv/data/disinto/runner/DOCKER_HUB_TOKEN" {
  capabilities = ["read"]
 }
--- a/vault/policies/runner-GITHUB_TOKEN.hcl
+++ b/vault/policies/runner-GITHUB_TOKEN.hcl
@ -1,10 +0,0 @@
 # vault/policies/runner-GITHUB_TOKEN.hcl
 #
 # Per-secret runner policy: GitHub PAT for cross-mirror push / API calls.
 # vault-runner (Step 5) composes only the runner-* policies named by the
 # dispatching action's `secrets = [...]` list, so this policy intentionally
 # scopes a single KV path — no wildcards, no list capability.
 path "kv/data/disinto/runner/GITHUB_TOKEN" {
  capabilities = ["read"]
 }
--- a/vault/policies/runner-NPM_TOKEN.hcl
+++ b/vault/policies/runner-NPM_TOKEN.hcl
@ -1,10 +0,0 @@
 # vault/policies/runner-NPM_TOKEN.hcl
 #
 # Per-secret runner policy: npm registry auth token for package publish.
 # vault-runner (Step 5) composes only the runner-* policies named by the
 # dispatching action's `secrets = [...]` list, so this policy intentionally
 # scopes a single KV path — no wildcards, no list capability.
 path "kv/data/disinto/runner/NPM_TOKEN" {
  capabilities = ["read"]
 }
--- a/vault/policies/service-forgejo.hcl
+++ b/vault/policies/service-forgejo.hcl
@ -1,15 +0,0 @@
 # vault/policies/service-forgejo.hcl
 #
 # Read-only access to shared Forgejo secrets (admin password, OAuth client
 # config). Attached to the Forgejo Nomad job via workload identity (S2.4).
 #
 # Scope: kv/disinto/shared/forgejo/* — entries owned by the operator and
 # shared between forgejo + the chat OAuth client (issue #855 lineage).
 path "kv/data/disinto/shared/forgejo/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/shared/forgejo/*" {
  capabilities = ["list", "read"]
 }
--- a/vault/policies/service-woodpecker.hcl
+++ b/vault/policies/service-woodpecker.hcl
@ -1,15 +0,0 @@
 # vault/policies/service-woodpecker.hcl
 #
 # Read-only access to shared Woodpecker secrets (agent secret, forge OAuth
 # client). Attached to the Woodpecker Nomad job via workload identity (S2.4).
 #
 # Scope: kv/disinto/shared/woodpecker/* — entries owned by the operator
 # and consumed by woodpecker-server + woodpecker-agent.
 path "kv/data/disinto/shared/woodpecker/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/disinto/shared/woodpecker/*" {
  capabilities = ["list", "read"]
 }