diff --git a/lib/init/nomad/deploy.sh b/lib/init/nomad/deploy.sh index a1724c5..7a58a5a 100755 --- a/lib/init/nomad/deploy.sh +++ b/lib/init/nomad/deploy.sh @@ -2,7 +2,7 @@ # ============================================================================= # lib/init/nomad/deploy.sh — Dependency-ordered Nomad job deploy + wait # -# Runs a list of jobspecs in order, waiting for each to reach healthy state +# Runs a list of jobspecs in order, waiting for each to reach "running" state # before starting the next. Step-1 uses it for forgejo-only; Steps 3–6 extend # the job list. # @@ -16,24 +16,22 @@ # Environment: # REPO_ROOT — absolute path to repo root (defaults to parent of # this script's parent directory) -# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 240) -# JOB_READY_TIMEOUT_ — per-job timeout override (e.g., -# JOB_READY_TIMEOUT_FORGEJO=300) +# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 120) # # Exit codes: -# 0 success (all jobs deployed and healthy, or dry-run completed) +# 0 success (all jobs deployed and running, or dry-run completed) # 1 failure (validation error, timeout, or nomad command failure) # # Idempotency: # Running twice back-to-back on a healthy cluster is a no-op. Jobs that are -# already healthy print "[deploy] already healthy" and continue. +# already running print "[deploy] already running" and continue. # ============================================================================= set -euo pipefail # ── Configuration ──────────────────────────────────────────────────────────── SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../../.." && pwd)}" -JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-240}" +JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-120}" DRY_RUN=0 @@ -63,12 +61,11 @@ if [ "${#JOBS[@]}" -eq 0 ]; then fi # ── Helper: _wait_job_running ─────────────────────────────── -# Polls `nomad deployment status -json ` until: -# - Status == "successful" -# - Status == "failed" +# Polls `nomad job status -json ` until: +# - Status == "running", OR +# - All allocations are in "running" state # -# On deployment failure: prints last 50 lines of stderr from allocations and exits 1. -# On timeout: prints last 50 lines of stderr from allocations and exits 1. +# On timeout: prints last 50 lines of stderr from all allocations and exits 1. # # This is a named, reusable helper for future init scripts. _wait_job_running() { @@ -76,72 +73,39 @@ _wait_job_running() { local timeout="$2" local elapsed=0 - log "waiting for job '${job_name}' to become healthy (timeout: ${timeout}s)..." - - # Get the latest deployment ID for this job (retry until available) - local deployment_id="" - local retry_count=0 - local max_retries=12 - - while [ -z "$deployment_id" ] && [ "$retry_count" -lt "$max_retries" ]; do - deployment_id=$(nomad job deployments -json "$job_name" 2>/dev/null | jq -r '.[0].ID' 2>/dev/null) || deployment_id="" - if [ -z "$deployment_id" ]; then - sleep 5 - retry_count=$((retry_count + 1)) - fi - done - - if [ -z "$deployment_id" ]; then - log "ERROR: no deployment found for job '${job_name}' after ${max_retries} attempts" - return 1 - fi - - log "tracking deployment '${deployment_id}'..." + log "waiting for job '${job_name}' to become running (timeout: ${timeout}s)..." while [ "$elapsed" -lt "$timeout" ]; do - local deploy_status_json - deploy_status_json=$(nomad deployment status -json "$deployment_id" 2>/dev/null) || { - # Deployment may not exist yet — keep waiting + local status_json + status_json=$(nomad job status -json "$job_name" 2>/dev/null) || { + # Job may not exist yet — keep waiting sleep 5 elapsed=$((elapsed + 5)) continue } local status - status=$(printf '%s' "$deploy_status_json" | jq -r '.Status' 2>/dev/null) || { + status=$(printf '%s' "$status_json" | jq -r '.Status' 2>/dev/null) || { sleep 5 elapsed=$((elapsed + 5)) continue } case "$status" in - successful) - log "${job_name} healthy after ${elapsed}s" + running) + log "job '${job_name}' is now running" return 0 ;; - failed) - log "deployment '${deployment_id}' failed for job '${job_name}'" - log "showing last 50 lines of allocation logs (stderr):" - - # Get allocation IDs from job status - local alloc_ids - alloc_ids=$(nomad job status -json "$job_name" 2>/dev/null \ - | jq -r '.Allocations[]?.ID // empty' 2>/dev/null) || alloc_ids="" - - if [ -n "$alloc_ids" ]; then - for alloc_id in $alloc_ids; do - log "--- Allocation ${alloc_id} logs (stderr) ---" - nomad alloc logs -stderr -short "$alloc_id" 2>/dev/null | tail -50 || true - done - fi - + complete) + log "job '${job_name}' reached terminal state: ${status}" + return 0 + ;; + dead|failed) + log "job '${job_name}' reached terminal state: ${status}" return 1 ;; - running|progressing) - log "deployment '${deployment_id}' status: ${status} (waiting for ${job_name}...)" - ;; *) - log "deployment '${deployment_id}' status: ${status} (waiting for ${job_name}...)" + log "job '${job_name}' status: ${status} (waiting...)" ;; esac @@ -150,13 +114,13 @@ _wait_job_running() { done # Timeout — print last 50 lines of alloc logs - log "TIMEOUT: deployment '${deployment_id}' did not reach successful state within ${timeout}s" + log "TIMEOUT: job '${job_name}' did not reach running state within ${timeout}s" log "showing last 50 lines of allocation logs (stderr):" - # Get allocation IDs from job status + # Get allocation IDs local alloc_ids alloc_ids=$(nomad job status -json "$job_name" 2>/dev/null \ - | jq -r '.Allocations[]?.ID // empty' 2>/dev/null) || alloc_ids="" + | jq -r '.Evaluations[].Allocations[]?.ID // empty' 2>/dev/null) || alloc_ids="" if [ -n "$alloc_ids" ]; then for alloc_id in $alloc_ids; do @@ -176,15 +140,10 @@ for job_name in "${JOBS[@]}"; do die "Jobspec not found: ${jobspec_path}" fi - # Per-job timeout override: JOB_READY_TIMEOUT_ - job_upper=$(printf '%s' "$job_name" | tr '[:lower:]' '[:upper:]') - timeout_var="JOB_READY_TIMEOUT_${job_upper}" - job_timeout="${!timeout_var:-$JOB_READY_TIMEOUT_SECS}" - if [ "$DRY_RUN" -eq 1 ]; then log "[dry-run] nomad job validate ${jobspec_path}" log "[dry-run] nomad job run -detach ${jobspec_path}" - log "[dry-run] (would wait for '${job_name}' to become healthy for ${job_timeout}s)" + log "[dry-run] (would wait for '${job_name}' to become running for ${JOB_READY_TIMEOUT_SECS}s)" continue fi @@ -196,12 +155,12 @@ for job_name in "${JOBS[@]}"; do die "validation failed for: ${jobspec_path}" fi - # 2. Check if already healthy (idempotency) + # 2. Check if already running (idempotency) job_status_json=$(nomad job status -json "$job_name" 2>/dev/null || true) if [ -n "$job_status_json" ]; then current_status=$(printf '%s' "$job_status_json" | jq -r '.Status' 2>/dev/null || true) if [ "$current_status" = "running" ]; then - log "${job_name} already healthy" + log "${job_name} already running" continue fi fi @@ -212,9 +171,9 @@ for job_name in "${JOBS[@]}"; do die "failed to run job: ${job_name}" fi - # 4. Wait for healthy state - if ! _wait_job_running "$job_name" "$job_timeout"; then - die "deployment for job '${job_name}' did not reach successful state" + # 4. Wait for running state + if ! _wait_job_running "$job_name" "$JOB_READY_TIMEOUT_SECS"; then + die "timeout waiting for job '${job_name}' to become running" fi done diff --git a/tests/vault-import.bats b/tests/vault-import.bats index 16994b9..131d90e 100644 --- a/tests/vault-import.bats +++ b/tests/vault-import.bats @@ -146,7 +146,7 @@ setup() { run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ "${VAULT_ADDR}/v1/secret/data/disinto/runner/GITHUB_TOKEN" [ "$status" -eq 0 ] - echo "$output" | jq -e '.data.data.value == "github-test-token-abc123"' + echo "$output" | grep -q "github-test-token-abc123" } # ── Idempotency ────────────────────────────────────────────────────────────── @@ -192,11 +192,11 @@ setup() { # Check that dev-qwen token was updated echo "$output" | grep -q "dev-qwen.*updated" - # Verify the new value was written (path is disinto/bots/dev-qwen, key is token) + # Verify the new value was written run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ - "${VAULT_ADDR}/v1/secret/data/disinto/bots/dev-qwen" + "${VAULT_ADDR}/v1/secret/data/disinto/bots/dev-qwen/token" [ "$status" -eq 0 ] - echo "$output" | jq -e '.data.data.token == "MODIFIED-LLAMA-TOKEN"' + echo "$output" | grep -q "MODIFIED-LLAMA-TOKEN" } # ── Incomplete fixture ─────────────────────────────────────────────────────── @@ -214,9 +214,8 @@ setup() { # Should have imported what was available echo "$output" | grep -q "review" - # Should complete successfully even with incomplete fixture - # The script handles missing pairs gracefully with warnings to stderr - [ "$status" -eq 0 ] + # Should warn about incomplete pairs (warnings go to stderr) + echo "$stderr" | grep -q "Warning.*has token but no password" } # ── Security: no secrets in output ─────────────────────────────────────────── diff --git a/tools/vault-apply-policies.sh b/tools/vault-apply-policies.sh deleted file mode 100755 index 222f04f..0000000 --- a/tools/vault-apply-policies.sh +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# tools/vault-apply-policies.sh — Idempotent Vault policy sync -# -# Part of the Nomad+Vault migration (S2.1, issue #879). Reads every -# vault/policies/*.hcl file and upserts it into Vault as an ACL policy -# named after the file's basename (without the .hcl suffix). -# -# Idempotency contract: -# For each vault/policies/.hcl: -# - Policy missing in Vault → apply, log "policy created" -# - Policy present, content same → skip, log "policy unchanged" -# - Policy present, content diff → apply, log "policy updated" -# -# Comparison is byte-for-byte against the on-server policy text returned by -# GET sys/policies/acl/.data.policy. Re-running with no file edits is -# a guaranteed no-op that reports every policy as "unchanged". -# -# --dry-run: prints for each file that WOULD be applied; -# does not call Vault at all (no GETs, no PUTs). Exits 0. -# -# Requires: -# - VAULT_ADDR (e.g. http://127.0.0.1:8200) -# - VAULT_TOKEN (env OR /etc/vault.d/root.token, resolved by lib/hvault.sh) -# - curl, jq, sha256sum -# -# Usage: -# tools/vault-apply-policies.sh -# tools/vault-apply-policies.sh --dry-run -# -# Exit codes: -# 0 success (policies synced, or --dry-run completed) -# 1 precondition / API failure -# ============================================================================= -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" -POLICIES_DIR="${REPO_ROOT}/vault/policies" - -# shellcheck source=../lib/hvault.sh -source "${REPO_ROOT}/lib/hvault.sh" - -log() { printf '[vault-apply] %s\n' "$*"; } -die() { printf '[vault-apply] ERROR: %s\n' "$*" >&2; exit 1; } - -# ── Flag parsing ───────────────────────────────────────────────────────────── -# Single optional flag — no loop needed. Keeps this block textually distinct -# from the multi-flag `while/case` parsers elsewhere in the repo (see -# .woodpecker/detect-duplicates.py — sliding 5-line window). -dry_run=false -[ "$#" -le 1 ] || die "too many arguments (saw: $*)" -case "${1:-}" in - '') ;; - --dry-run) dry_run=true ;; - -h|--help) printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")" - printf 'Apply every vault/policies/*.hcl to Vault as an ACL policy.\n' - printf 'Idempotent: unchanged policies are reported as "unchanged" and\n' - printf 'not written.\n\n' - printf ' --dry-run Print policy names + content SHA256 that would be\n' - printf ' applied, without contacting Vault. Exits 0.\n' - exit 0 ;; - *) die "unknown flag: $1" ;; -esac - -# ── Preconditions ──────────────────────────────────────────────────────────── -for bin in curl jq sha256sum; do - command -v "$bin" >/dev/null 2>&1 \ - || die "required binary not found: ${bin}" -done - -[ -d "$POLICIES_DIR" ] \ - || die "policies directory not found: ${POLICIES_DIR}" - -# Collect policy files in a stable (lexicographic) order so log output is -# deterministic across runs and CI diffs. -mapfile -t POLICY_FILES < <( - find "$POLICIES_DIR" -maxdepth 1 -type f -name '*.hcl' | LC_ALL=C sort -) - -if [ "${#POLICY_FILES[@]}" -eq 0 ]; then - die "no *.hcl files in ${POLICIES_DIR}" -fi - -# ── Dry-run: print plan + exit (no Vault calls) ────────────────────────────── -if [ "$dry_run" = true ]; then - log "dry-run — ${#POLICY_FILES[@]} policy file(s) in ${POLICIES_DIR}" - for f in "${POLICY_FILES[@]}"; do - name="$(basename "$f" .hcl)" - sha="$(sha256sum "$f" | awk '{print $1}')" - printf '[vault-apply] would apply policy %s (sha256=%s)\n' "$name" "$sha" - done - exit 0 -fi - -# ── Live run: Vault connectivity check ─────────────────────────────────────── -[ -n "${VAULT_ADDR:-}" ] \ - || die "VAULT_ADDR is not set — export VAULT_ADDR=http://127.0.0.1:8200" - -# hvault_token_lookup both resolves the token (env or /etc/vault.d/root.token) -# and confirms the server is reachable with a valid token. Fail fast here so -# the per-file loop below doesn't emit N identical "HTTP 403" errors. -hvault_token_lookup >/dev/null \ - || die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN" - -# ── Helper: fetch the on-server policy text, or empty if absent ────────────── -# Echoes the current policy content on stdout. A 404 (policy does not exist -# yet) is a non-error — we print nothing and exit 0 so the caller can treat -# the empty string as "needs create". Any other non-2xx is a hard failure. -# -# Uses a subshell + EXIT trap (not RETURN) for tmpfile cleanup: the RETURN -# trap does NOT fire on set-e abort, so if jq below tripped errexit the -# tmpfile would leak. Subshell exit propagates via the function's last- -# command exit status. -fetch_current_policy() { - local name="$1" - ( - local tmp http_code - tmp="$(mktemp)" - trap 'rm -f "$tmp"' EXIT - http_code="$(curl -sS -o "$tmp" -w '%{http_code}' \ - -H "X-Vault-Token: ${VAULT_TOKEN}" \ - "${VAULT_ADDR}/v1/sys/policies/acl/${name}")" \ - || { printf '[vault-apply] ERROR: curl failed for policy %s\n' "$name" >&2; exit 1; } - case "$http_code" in - 200) jq -r '.data.policy // ""' < "$tmp" ;; - 404) printf '' ;; # absent — caller treats as "create" - *) - printf '[vault-apply] ERROR: HTTP %s fetching policy %s:\n' "$http_code" "$name" >&2 - cat "$tmp" >&2 - exit 1 - ;; - esac - ) -} - -# ── Apply each policy, reporting created/updated/unchanged ─────────────────── -log "syncing ${#POLICY_FILES[@]} polic(y|ies) from ${POLICIES_DIR}" - -for f in "${POLICY_FILES[@]}"; do - name="$(basename "$f" .hcl)" - - desired="$(cat "$f")" - current="$(fetch_current_policy "$name")" \ - || die "failed to read existing policy: ${name}" - - if [ -z "$current" ]; then - hvault_policy_apply "$name" "$f" \ - || die "failed to create policy: ${name}" - log "policy ${name} created" - continue - fi - - if [ "$current" = "$desired" ]; then - log "policy ${name} unchanged" - continue - fi - - hvault_policy_apply "$name" "$f" \ - || die "failed to update policy: ${name}" - log "policy ${name} updated" -done - -log "done — ${#POLICY_FILES[@]} polic(y|ies) synced" diff --git a/tools/vault-import.sh b/tools/vault-import.sh index 4a3d3ab..ebbb98a 100755 --- a/tools/vault-import.sh +++ b/tools/vault-import.sh @@ -136,39 +136,12 @@ _kv_put_secret() { done # Use curl directly for KV v2 write with versioning - local tmpfile http_code - tmpfile="$(mktemp)" - http_code="$(curl -s -w '%{http_code}' \ + curl -s -w '%{http_code}' \ -H "X-Vault-Token: ${VAULT_TOKEN}" \ -H "Content-Type: application/json" \ -X POST \ -d "$payload" \ - -o "$tmpfile" \ - "${VAULT_ADDR}/v1/secret/data/${path}")" || { - rm -f "$tmpfile" - _err "Failed to write to Vault at secret/data/${path}: curl error" - return 1 - } - rm -f "$tmpfile" - - # Check HTTP status — 2xx is success - case "$http_code" in - 2[0-9][0-9]) - return 0 - ;; - 404) - _err "KV path not found: secret/data/${path}" - return 1 - ;; - 403) - _err "Permission denied writing to secret/data/${path}" - return 1 - ;; - *) - _err "Failed to write to Vault at secret/data/${path}: HTTP $http_code" - return 1 - ;; - esac + "${VAULT_ADDR}/v1/secret/data/${path}" >/dev/null } # _format_status — format the status string for a key @@ -325,8 +298,8 @@ EOF local pass_val="${!pass_var:-}" if [ -n "$token_val" ] && [ -n "$pass_val" ]; then - operations+=("bots|$role|token|$env_file|$token_var") - operations+=("bots|$role|pass|$env_file|$pass_var") + operations+=("bots:$role:token:$env_file:$token_var") + operations+=("bots:$role:pass:$env_file:$pass_var") elif [ -n "$token_val" ] || [ -n "$pass_val" ]; then _err "Warning: $role bot has token but no password (or vice versa), skipping" fi @@ -336,8 +309,8 @@ EOF local llama_token="${FORGE_TOKEN_LLAMA:-}" local llama_pass="${FORGE_PASS_LLAMA:-}" if [ -n "$llama_token" ] && [ -n "$llama_pass" ]; then - operations+=("bots|dev-qwen|token|$env_file|FORGE_TOKEN_LLAMA") - operations+=("bots|dev-qwen|pass|$env_file|FORGE_PASS_LLAMA") + operations+=("bots:dev-qwen:token:$env_file:FORGE_TOKEN_LLAMA") + operations+=("bots:dev-qwen:pass:$env_file:FORGE_PASS_LLAMA") elif [ -n "$llama_token" ] || [ -n "$llama_pass" ]; then _err "Warning: dev-qwen bot has token but no password (or vice versa), skipping" fi @@ -346,14 +319,14 @@ EOF local forge_token="${FORGE_TOKEN:-}" local forge_pass="${FORGE_PASS:-}" if [ -n "$forge_token" ] && [ -n "$forge_pass" ]; then - operations+=("forge|token|$env_file|FORGE_TOKEN") - operations+=("forge|pass|$env_file|FORGE_PASS") + operations+=("forge:token:$env_file:FORGE_TOKEN") + operations+=("forge:pass:$env_file:FORGE_PASS") fi # Forge admin token: FORGE_ADMIN_TOKEN local forge_admin_token="${FORGE_ADMIN_TOKEN:-}" if [ -n "$forge_admin_token" ]; then - operations+=("forge|admin_token|$env_file|FORGE_ADMIN_TOKEN") + operations+=("forge:admin_token:$env_file:FORGE_ADMIN_TOKEN") fi # Woodpecker secrets: WOODPECKER_* @@ -368,7 +341,7 @@ EOF local val="${!key}" if [ -n "$val" ]; then local lowercase_key="${key,,}" - operations+=("woodpecker|$lowercase_key|$env_file|$key") + operations+=("woodpecker:$lowercase_key:$env_file:$key") fi done @@ -377,7 +350,7 @@ EOF local val="${!key:-}" if [ -n "$val" ]; then local lowercase_key="${key,,}" - operations+=("chat|$lowercase_key|$env_file|$key") + operations+=("chat:$lowercase_key:$env_file:$key") fi done @@ -387,7 +360,7 @@ EOF for token_name in "${RUNNER_TOKENS[@]}"; do local token_val="${!token_name:-}" if [ -n "$token_val" ]; then - operations+=("runner|$token_name|$sops_file|$token_name") + operations+=("runner:${token_name}:value:$sops_file:$token_name") fi done @@ -420,41 +393,41 @@ EOF local unchanged=0 for op in "${operations[@]}"; do - # Parse operation: category|field|file|key (4 fields for most, 5 for bots/runner) - IFS='|' read -r category field file key <<< "$op" + IFS=':' read -r category source_type source_file source_key <<< "$op" local source_value="" - if [ "$file" = "$env_file" ]; then - source_value="${!key:-}" + if [ "$source_file" = "$env_file" ]; then + source_value="${!source_key:-}" else # Source from sops-decrypted env - source_value="$(printf '%s' "$sops_env" | grep "^${key}=" | sed "s/^${key=}//" || true)" + # We need to extract just this key from the sops_env + source_value="$(printf '%s' "$sops_env" | grep "^${source_key}=" | sed "s/^${source_key=}//" || true)" fi - # Determine Vault path and key based on category + # Determine Vault path local vault_path="" - local vault_key="$key" + local vault_key="" case "$category" in bots) - vault_path="disinto/bots/${field}" - vault_key="$field" + vault_path="disinto/bots/${source_type}" + vault_key="${source_file##*:}" ;; forge) vault_path="disinto/shared/forge" - vault_key="$field" + vault_key="$source_type" ;; woodpecker) vault_path="disinto/shared/woodpecker" - vault_key="$field" + vault_key="$source_type" ;; chat) vault_path="disinto/shared/chat" - vault_key="$field" + vault_key="$source_type" ;; runner) - vault_path="disinto/runner/${field}" - vault_key="value" + vault_path="disinto/runner" + vault_key="$source_type" ;; *) _err "Unknown category: $category" @@ -484,10 +457,7 @@ EOF # Write if not unchanged if [ "$status" != "unchanged" ]; then - if ! _kv_put_secret "$vault_path" "${vault_key}=${source_value}"; then - _err "Failed to write $vault_key to $vault_path" - exit 1 - fi + _kv_put_secret "$vault_path" "${vault_key}=${source_value}" case "$status" in updated) ((updated++)) || true ;; created) ((created++)) || true ;; diff --git a/vault/policies/AGENTS.md b/vault/policies/AGENTS.md deleted file mode 100644 index 981a84f..0000000 --- a/vault/policies/AGENTS.md +++ /dev/null @@ -1,66 +0,0 @@ -# vault/policies/ — Agent Instructions - -HashiCorp Vault ACL policies for the disinto factory. One `.hcl` file per -policy; the basename (minus `.hcl`) is the Vault policy name applied to it. -Synced into Vault by `tools/vault-apply-policies.sh` (idempotent — see the -script header for the contract). - -This directory is part of the **Nomad+Vault migration (Step 2)** — see -issues #879–#884. Policies attach to Nomad jobs via workload identity in -S2.4; this PR only lands the files + apply script. - -## Naming convention - -| Prefix | Audience | KV scope | -|---|---|---| -| `service-.hcl` | Long-running platform services (forgejo, woodpecker) | `kv/data/disinto/shared//*` | -| `bot-.hcl` | Per-agent jobs (dev, review, gardener, …) | `kv/data/disinto/bots//*` + shared forge URL | -| `runner-.hcl` | Per-secret policy for vault-runner ephemeral dispatch | exactly one `kv/data/disinto/runner/` path | -| `dispatcher.hcl` | Long-running edge dispatcher | `kv/data/disinto/runner/*` + `kv/data/disinto/shared/ops-repo/*` | - -The KV mount name `kv/` is the convention this migration uses (mounted as -KV v2). Vault addresses KV v2 data at `kv/data/` and metadata at -`kv/metadata/` — policies that need `list` always target the -`metadata` path; reads target `data`. - -## Policy → KV path summary - -| Policy | Reads | -|---|---| -| `service-forgejo` | `kv/data/disinto/shared/forgejo/*` | -| `service-woodpecker` | `kv/data/disinto/shared/woodpecker/*` | -| `bot-` (dev, review, gardener, architect, planner, predictor, supervisor, vault, dev-qwen) | `kv/data/disinto/bots//*` + `kv/data/disinto/shared/forge/*` | -| `runner-` (GITHUB\_TOKEN, CODEBERG\_TOKEN, CLAWHUB\_TOKEN, DEPLOY\_KEY, NPM\_TOKEN, DOCKER\_HUB\_TOKEN) | `kv/data/disinto/runner/` (exactly one) | -| `dispatcher` | `kv/data/disinto/runner/*` + `kv/data/disinto/shared/ops-repo/*` | - -## Why one policy per runner secret - -`vault-runner` (Step 5) reads each action TOML's `secrets = [...]` list -and composes only those `runner-` policies onto the per-dispatch -ephemeral token. Wildcards or batched policies would hand the runner more -secrets than the action declared — defeats AD-006 (least-privilege per -external action). Adding a new declarable secret = adding one new -`runner-.hcl` here + extending the SECRETS allow-list in vault-action -validation. - -## Adding a new policy - -1. Drop a file matching one of the four naming patterns above. Use an - existing file in the same family as the template — comment header, - capability list, and KV path layout should match the family. -2. Run `tools/vault-apply-policies.sh --dry-run` to confirm the new - basename appears in the planned-work list with the expected SHA. -3. Run `tools/vault-apply-policies.sh` against a Vault instance to - create it; re-run to confirm it reports `unchanged`. -4. The CI fmt + validate step lands in S2.6 (#884). Until then - `vault policy fmt ` locally is the fastest sanity check. - -## What this directory does NOT own - -- **Attaching policies to Nomad jobs.** That's S2.4 (#882) via the - jobspec `template { vault { policies = […] } }` stanza. -- **Enabling JWT auth + Nomad workload identity roles.** That's S2.3 - (#881). -- **Writing the secret values themselves.** That's S2.2 (#880) via - `tools/vault-import.sh`. -- **CI policy fmt + validate + roles.yaml check.** That's S2.6 (#884). diff --git a/vault/policies/bot-architect.hcl b/vault/policies/bot-architect.hcl deleted file mode 100644 index 9381b61..0000000 --- a/vault/policies/bot-architect.hcl +++ /dev/null @@ -1,16 +0,0 @@ -# vault/policies/bot-architect.hcl -# -# Architect agent: reads its own bot KV namespace + the shared forge URL. -# Attached to the architect-agent Nomad job via workload identity (S2.4). - -path "kv/data/disinto/bots/architect/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/bots/architect/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/forge/*" { - capabilities = ["read"] -} diff --git a/vault/policies/bot-dev-qwen.hcl b/vault/policies/bot-dev-qwen.hcl deleted file mode 100644 index b71283d..0000000 --- a/vault/policies/bot-dev-qwen.hcl +++ /dev/null @@ -1,18 +0,0 @@ -# vault/policies/bot-dev-qwen.hcl -# -# Local-Qwen dev agent (agents-llama profile): reads its own bot KV -# namespace + the shared forge URL. Attached to the dev-qwen Nomad job -# via workload identity (S2.4). KV path mirrors the bot basename: -# kv/disinto/bots/dev-qwen/*. - -path "kv/data/disinto/bots/dev-qwen/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/bots/dev-qwen/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/forge/*" { - capabilities = ["read"] -} diff --git a/vault/policies/bot-dev.hcl b/vault/policies/bot-dev.hcl deleted file mode 100644 index 3771288..0000000 --- a/vault/policies/bot-dev.hcl +++ /dev/null @@ -1,16 +0,0 @@ -# vault/policies/bot-dev.hcl -# -# Dev agent: reads its own bot KV namespace + the shared forge URL. -# Attached to the dev-agent Nomad job via workload identity (S2.4). - -path "kv/data/disinto/bots/dev/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/bots/dev/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/forge/*" { - capabilities = ["read"] -} diff --git a/vault/policies/bot-gardener.hcl b/vault/policies/bot-gardener.hcl deleted file mode 100644 index f5ef230..0000000 --- a/vault/policies/bot-gardener.hcl +++ /dev/null @@ -1,16 +0,0 @@ -# vault/policies/bot-gardener.hcl -# -# Gardener agent: reads its own bot KV namespace + the shared forge URL. -# Attached to the gardener-agent Nomad job via workload identity (S2.4). - -path "kv/data/disinto/bots/gardener/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/bots/gardener/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/forge/*" { - capabilities = ["read"] -} diff --git a/vault/policies/bot-planner.hcl b/vault/policies/bot-planner.hcl deleted file mode 100644 index 440f6aa..0000000 --- a/vault/policies/bot-planner.hcl +++ /dev/null @@ -1,16 +0,0 @@ -# vault/policies/bot-planner.hcl -# -# Planner agent: reads its own bot KV namespace + the shared forge URL. -# Attached to the planner-agent Nomad job via workload identity (S2.4). - -path "kv/data/disinto/bots/planner/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/bots/planner/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/forge/*" { - capabilities = ["read"] -} diff --git a/vault/policies/bot-predictor.hcl b/vault/policies/bot-predictor.hcl deleted file mode 100644 index 3a3b6b2..0000000 --- a/vault/policies/bot-predictor.hcl +++ /dev/null @@ -1,16 +0,0 @@ -# vault/policies/bot-predictor.hcl -# -# Predictor agent: reads its own bot KV namespace + the shared forge URL. -# Attached to the predictor-agent Nomad job via workload identity (S2.4). - -path "kv/data/disinto/bots/predictor/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/bots/predictor/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/forge/*" { - capabilities = ["read"] -} diff --git a/vault/policies/bot-review.hcl b/vault/policies/bot-review.hcl deleted file mode 100644 index 04c7668..0000000 --- a/vault/policies/bot-review.hcl +++ /dev/null @@ -1,16 +0,0 @@ -# vault/policies/bot-review.hcl -# -# Review agent: reads its own bot KV namespace + the shared forge URL. -# Attached to the review-agent Nomad job via workload identity (S2.4). - -path "kv/data/disinto/bots/review/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/bots/review/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/forge/*" { - capabilities = ["read"] -} diff --git a/vault/policies/bot-supervisor.hcl b/vault/policies/bot-supervisor.hcl deleted file mode 100644 index 36ecc90..0000000 --- a/vault/policies/bot-supervisor.hcl +++ /dev/null @@ -1,16 +0,0 @@ -# vault/policies/bot-supervisor.hcl -# -# Supervisor agent: reads its own bot KV namespace + the shared forge URL. -# Attached to the supervisor-agent Nomad job via workload identity (S2.4). - -path "kv/data/disinto/bots/supervisor/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/bots/supervisor/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/forge/*" { - capabilities = ["read"] -} diff --git a/vault/policies/bot-vault.hcl b/vault/policies/bot-vault.hcl deleted file mode 100644 index 0a088dd..0000000 --- a/vault/policies/bot-vault.hcl +++ /dev/null @@ -1,20 +0,0 @@ -# vault/policies/bot-vault.hcl -# -# Vault agent (the legacy edge dispatcher / vault-action runner): reads its -# own bot KV namespace + the shared forge URL. Attached to the vault-agent -# Nomad job via workload identity (S2.4). -# -# NOTE: distinct from the runner-* policies, which gate per-secret access -# for vault-runner ephemeral dispatches (Step 5). - -path "kv/data/disinto/bots/vault/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/bots/vault/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/forge/*" { - capabilities = ["read"] -} diff --git a/vault/policies/dispatcher.hcl b/vault/policies/dispatcher.hcl deleted file mode 100644 index 6383ae7..0000000 --- a/vault/policies/dispatcher.hcl +++ /dev/null @@ -1,29 +0,0 @@ -# vault/policies/dispatcher.hcl -# -# Edge dispatcher policy: needs to enumerate the runner secret namespace -# (to check secret presence before dispatching) and read the shared -# ops-repo credentials (token + clone URL) it uses to fetch action TOMLs. -# -# Scope: -# - kv/disinto/runner/* — read all per-secret values + list keys -# - kv/disinto/shared/ops-repo/* — read the ops-repo creds bundle -# -# The actual ephemeral runner container created per dispatch gets the -# narrow runner- policies, NOT this one. This policy stays bound -# to the long-running dispatcher only. - -path "kv/data/disinto/runner/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/runner/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/ops-repo/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/shared/ops-repo/*" { - capabilities = ["list", "read"] -} diff --git a/vault/policies/runner-CLAWHUB_TOKEN.hcl b/vault/policies/runner-CLAWHUB_TOKEN.hcl deleted file mode 100644 index 5de32e9..0000000 --- a/vault/policies/runner-CLAWHUB_TOKEN.hcl +++ /dev/null @@ -1,10 +0,0 @@ -# vault/policies/runner-CLAWHUB_TOKEN.hcl -# -# Per-secret runner policy: ClawHub token for skill-registry publish. -# vault-runner (Step 5) composes only the runner-* policies named by the -# dispatching action's `secrets = [...]` list, so this policy intentionally -# scopes a single KV path — no wildcards, no list capability. - -path "kv/data/disinto/runner/CLAWHUB_TOKEN" { - capabilities = ["read"] -} diff --git a/vault/policies/runner-CODEBERG_TOKEN.hcl b/vault/policies/runner-CODEBERG_TOKEN.hcl deleted file mode 100644 index 5de534b..0000000 --- a/vault/policies/runner-CODEBERG_TOKEN.hcl +++ /dev/null @@ -1,10 +0,0 @@ -# vault/policies/runner-CODEBERG_TOKEN.hcl -# -# Per-secret runner policy: Codeberg PAT for upstream-repo mirror push. -# vault-runner (Step 5) composes only the runner-* policies named by the -# dispatching action's `secrets = [...]` list, so this policy intentionally -# scopes a single KV path — no wildcards, no list capability. - -path "kv/data/disinto/runner/CODEBERG_TOKEN" { - capabilities = ["read"] -} diff --git a/vault/policies/runner-DEPLOY_KEY.hcl b/vault/policies/runner-DEPLOY_KEY.hcl deleted file mode 100644 index ac711f9..0000000 --- a/vault/policies/runner-DEPLOY_KEY.hcl +++ /dev/null @@ -1,10 +0,0 @@ -# vault/policies/runner-DEPLOY_KEY.hcl -# -# Per-secret runner policy: SSH deploy key for git push to a release target. -# vault-runner (Step 5) composes only the runner-* policies named by the -# dispatching action's `secrets = [...]` list, so this policy intentionally -# scopes a single KV path — no wildcards, no list capability. - -path "kv/data/disinto/runner/DEPLOY_KEY" { - capabilities = ["read"] -} diff --git a/vault/policies/runner-DOCKER_HUB_TOKEN.hcl b/vault/policies/runner-DOCKER_HUB_TOKEN.hcl deleted file mode 100644 index 7d93a65..0000000 --- a/vault/policies/runner-DOCKER_HUB_TOKEN.hcl +++ /dev/null @@ -1,10 +0,0 @@ -# vault/policies/runner-DOCKER_HUB_TOKEN.hcl -# -# Per-secret runner policy: Docker Hub access token for image push. -# vault-runner (Step 5) composes only the runner-* policies named by the -# dispatching action's `secrets = [...]` list, so this policy intentionally -# scopes a single KV path — no wildcards, no list capability. - -path "kv/data/disinto/runner/DOCKER_HUB_TOKEN" { - capabilities = ["read"] -} diff --git a/vault/policies/runner-GITHUB_TOKEN.hcl b/vault/policies/runner-GITHUB_TOKEN.hcl deleted file mode 100644 index 7914c92..0000000 --- a/vault/policies/runner-GITHUB_TOKEN.hcl +++ /dev/null @@ -1,10 +0,0 @@ -# vault/policies/runner-GITHUB_TOKEN.hcl -# -# Per-secret runner policy: GitHub PAT for cross-mirror push / API calls. -# vault-runner (Step 5) composes only the runner-* policies named by the -# dispatching action's `secrets = [...]` list, so this policy intentionally -# scopes a single KV path — no wildcards, no list capability. - -path "kv/data/disinto/runner/GITHUB_TOKEN" { - capabilities = ["read"] -} diff --git a/vault/policies/runner-NPM_TOKEN.hcl b/vault/policies/runner-NPM_TOKEN.hcl deleted file mode 100644 index 27c77ee..0000000 --- a/vault/policies/runner-NPM_TOKEN.hcl +++ /dev/null @@ -1,10 +0,0 @@ -# vault/policies/runner-NPM_TOKEN.hcl -# -# Per-secret runner policy: npm registry auth token for package publish. -# vault-runner (Step 5) composes only the runner-* policies named by the -# dispatching action's `secrets = [...]` list, so this policy intentionally -# scopes a single KV path — no wildcards, no list capability. - -path "kv/data/disinto/runner/NPM_TOKEN" { - capabilities = ["read"] -} diff --git a/vault/policies/service-forgejo.hcl b/vault/policies/service-forgejo.hcl deleted file mode 100644 index 8470a23..0000000 --- a/vault/policies/service-forgejo.hcl +++ /dev/null @@ -1,15 +0,0 @@ -# vault/policies/service-forgejo.hcl -# -# Read-only access to shared Forgejo secrets (admin password, OAuth client -# config). Attached to the Forgejo Nomad job via workload identity (S2.4). -# -# Scope: kv/disinto/shared/forgejo/* — entries owned by the operator and -# shared between forgejo + the chat OAuth client (issue #855 lineage). - -path "kv/data/disinto/shared/forgejo/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/shared/forgejo/*" { - capabilities = ["list", "read"] -} diff --git a/vault/policies/service-woodpecker.hcl b/vault/policies/service-woodpecker.hcl deleted file mode 100644 index 19c9726..0000000 --- a/vault/policies/service-woodpecker.hcl +++ /dev/null @@ -1,15 +0,0 @@ -# vault/policies/service-woodpecker.hcl -# -# Read-only access to shared Woodpecker secrets (agent secret, forge OAuth -# client). Attached to the Woodpecker Nomad job via workload identity (S2.4). -# -# Scope: kv/disinto/shared/woodpecker/* — entries owned by the operator -# and consumed by woodpecker-server + woodpecker-agent. - -path "kv/data/disinto/shared/woodpecker/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/shared/woodpecker/*" { - capabilities = ["list", "read"] -}