diff --git a/lib/init/nomad/deploy.sh b/lib/init/nomad/deploy.sh index 7a58a5a..a1724c5 100755 --- a/lib/init/nomad/deploy.sh +++ b/lib/init/nomad/deploy.sh @@ -2,7 +2,7 @@ # ============================================================================= # lib/init/nomad/deploy.sh — Dependency-ordered Nomad job deploy + wait # -# Runs a list of jobspecs in order, waiting for each to reach "running" state +# Runs a list of jobspecs in order, waiting for each to reach healthy state # before starting the next. Step-1 uses it for forgejo-only; Steps 3–6 extend # the job list. # @@ -16,22 +16,24 @@ # Environment: # REPO_ROOT — absolute path to repo root (defaults to parent of # this script's parent directory) -# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 120) +# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 240) +# JOB_READY_TIMEOUT_ — per-job timeout override (e.g., +# JOB_READY_TIMEOUT_FORGEJO=300) # # Exit codes: -# 0 success (all jobs deployed and running, or dry-run completed) +# 0 success (all jobs deployed and healthy, or dry-run completed) # 1 failure (validation error, timeout, or nomad command failure) # # Idempotency: # Running twice back-to-back on a healthy cluster is a no-op. Jobs that are -# already running print "[deploy] already running" and continue. +# already healthy print "[deploy] already healthy" and continue. # ============================================================================= set -euo pipefail # ── Configuration ──────────────────────────────────────────────────────────── SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../../.." && pwd)}" -JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-120}" +JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-240}" DRY_RUN=0 @@ -61,11 +63,12 @@ if [ "${#JOBS[@]}" -eq 0 ]; then fi # ── Helper: _wait_job_running ─────────────────────────────── -# Polls `nomad job status -json ` until: -# - Status == "running", OR -# - All allocations are in "running" state +# Polls `nomad deployment status -json ` until: +# - Status == "successful" +# - Status == "failed" # -# On timeout: prints last 50 lines of stderr from all allocations and exits 1. +# On deployment failure: prints last 50 lines of stderr from allocations and exits 1. +# On timeout: prints last 50 lines of stderr from allocations and exits 1. # # This is a named, reusable helper for future init scripts. _wait_job_running() { @@ -73,39 +76,72 @@ _wait_job_running() { local timeout="$2" local elapsed=0 - log "waiting for job '${job_name}' to become running (timeout: ${timeout}s)..." + log "waiting for job '${job_name}' to become healthy (timeout: ${timeout}s)..." + + # Get the latest deployment ID for this job (retry until available) + local deployment_id="" + local retry_count=0 + local max_retries=12 + + while [ -z "$deployment_id" ] && [ "$retry_count" -lt "$max_retries" ]; do + deployment_id=$(nomad job deployments -json "$job_name" 2>/dev/null | jq -r '.[0].ID' 2>/dev/null) || deployment_id="" + if [ -z "$deployment_id" ]; then + sleep 5 + retry_count=$((retry_count + 1)) + fi + done + + if [ -z "$deployment_id" ]; then + log "ERROR: no deployment found for job '${job_name}' after ${max_retries} attempts" + return 1 + fi + + log "tracking deployment '${deployment_id}'..." while [ "$elapsed" -lt "$timeout" ]; do - local status_json - status_json=$(nomad job status -json "$job_name" 2>/dev/null) || { - # Job may not exist yet — keep waiting + local deploy_status_json + deploy_status_json=$(nomad deployment status -json "$deployment_id" 2>/dev/null) || { + # Deployment may not exist yet — keep waiting sleep 5 elapsed=$((elapsed + 5)) continue } local status - status=$(printf '%s' "$status_json" | jq -r '.Status' 2>/dev/null) || { + status=$(printf '%s' "$deploy_status_json" | jq -r '.Status' 2>/dev/null) || { sleep 5 elapsed=$((elapsed + 5)) continue } case "$status" in - running) - log "job '${job_name}' is now running" + successful) + log "${job_name} healthy after ${elapsed}s" return 0 ;; - complete) - log "job '${job_name}' reached terminal state: ${status}" - return 0 - ;; - dead|failed) - log "job '${job_name}' reached terminal state: ${status}" + failed) + log "deployment '${deployment_id}' failed for job '${job_name}'" + log "showing last 50 lines of allocation logs (stderr):" + + # Get allocation IDs from job status + local alloc_ids + alloc_ids=$(nomad job status -json "$job_name" 2>/dev/null \ + | jq -r '.Allocations[]?.ID // empty' 2>/dev/null) || alloc_ids="" + + if [ -n "$alloc_ids" ]; then + for alloc_id in $alloc_ids; do + log "--- Allocation ${alloc_id} logs (stderr) ---" + nomad alloc logs -stderr -short "$alloc_id" 2>/dev/null | tail -50 || true + done + fi + return 1 ;; + running|progressing) + log "deployment '${deployment_id}' status: ${status} (waiting for ${job_name}...)" + ;; *) - log "job '${job_name}' status: ${status} (waiting...)" + log "deployment '${deployment_id}' status: ${status} (waiting for ${job_name}...)" ;; esac @@ -114,13 +150,13 @@ _wait_job_running() { done # Timeout — print last 50 lines of alloc logs - log "TIMEOUT: job '${job_name}' did not reach running state within ${timeout}s" + log "TIMEOUT: deployment '${deployment_id}' did not reach successful state within ${timeout}s" log "showing last 50 lines of allocation logs (stderr):" - # Get allocation IDs + # Get allocation IDs from job status local alloc_ids alloc_ids=$(nomad job status -json "$job_name" 2>/dev/null \ - | jq -r '.Evaluations[].Allocations[]?.ID // empty' 2>/dev/null) || alloc_ids="" + | jq -r '.Allocations[]?.ID // empty' 2>/dev/null) || alloc_ids="" if [ -n "$alloc_ids" ]; then for alloc_id in $alloc_ids; do @@ -140,10 +176,15 @@ for job_name in "${JOBS[@]}"; do die "Jobspec not found: ${jobspec_path}" fi + # Per-job timeout override: JOB_READY_TIMEOUT_ + job_upper=$(printf '%s' "$job_name" | tr '[:lower:]' '[:upper:]') + timeout_var="JOB_READY_TIMEOUT_${job_upper}" + job_timeout="${!timeout_var:-$JOB_READY_TIMEOUT_SECS}" + if [ "$DRY_RUN" -eq 1 ]; then log "[dry-run] nomad job validate ${jobspec_path}" log "[dry-run] nomad job run -detach ${jobspec_path}" - log "[dry-run] (would wait for '${job_name}' to become running for ${JOB_READY_TIMEOUT_SECS}s)" + log "[dry-run] (would wait for '${job_name}' to become healthy for ${job_timeout}s)" continue fi @@ -155,12 +196,12 @@ for job_name in "${JOBS[@]}"; do die "validation failed for: ${jobspec_path}" fi - # 2. Check if already running (idempotency) + # 2. Check if already healthy (idempotency) job_status_json=$(nomad job status -json "$job_name" 2>/dev/null || true) if [ -n "$job_status_json" ]; then current_status=$(printf '%s' "$job_status_json" | jq -r '.Status' 2>/dev/null || true) if [ "$current_status" = "running" ]; then - log "${job_name} already running" + log "${job_name} already healthy" continue fi fi @@ -171,9 +212,9 @@ for job_name in "${JOBS[@]}"; do die "failed to run job: ${job_name}" fi - # 4. Wait for running state - if ! _wait_job_running "$job_name" "$JOB_READY_TIMEOUT_SECS"; then - die "timeout waiting for job '${job_name}' to become running" + # 4. Wait for healthy state + if ! _wait_job_running "$job_name" "$job_timeout"; then + die "deployment for job '${job_name}' did not reach successful state" fi done diff --git a/tests/fixtures/.env.vault.enc b/tests/fixtures/.env.vault.enc new file mode 100644 index 0000000..2924dc9 --- /dev/null +++ b/tests/fixtures/.env.vault.enc @@ -0,0 +1,20 @@ +{ + "data": "ENC[AES256_GCM,data:SsLdIiZDVkkV1bbKeHQ8A1K/4vgXQFJF8y4J87GGwsGa13lNnPoqRaCmPAtuQr3hR5JNqARUhFp8aEusyzwi/lZLU2Reo32YjE26ObVOHf47EGmmHM/tEgh6u0fa1AmFtuqJVQzhG2eZhJmZJFgdRH36+bhdBwI1mkORmsRNtBPHHjtQJDbsgN47maDhuP4B7WvB4/TdnJ++GNMlMbyrbr0pEf2uqqOVO55cJ3I4v/Jcg8tq0clPuW1k5dNFsmFSMbbjE5N25EGrc7oEH5GVZ6I6L6p0Fzyj/MV4hKacboFHiZmBZgRQ,iv:UnXTa800G3PW4IaErkPBIZKjPHAU3LmiCvAqDdhFE/Q=,tag:kdWpHQ8fEPGFlmfVoTMskA==,type:str]", + "sops": { + "kms": null, + "gcp_kms": null, + "azure_kv": null, + "hc_vault": null, + "age": [ + { + "recipient": "age1ztkm8yvdk42m2cn4dj2v9ptfknq8wpgr3ry9dpmtmlaeas6p7yyqft0ldg", + "enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBrVUlmaEdTNU1iMGg4dFA4\nNFNOSzlBc1NER1U3SHlwVFU1dm5tR1kyeldzCjZ2NXI3MjR4Zkd1RVBKNzJoQ1Jm\nQWpEZU5VMkNuYnhTTVJNc0RpTXlIZE0KLS0tIDFpQ2tlN0MzL1NuS2hKZU5JTG9B\nNWxXMzE0bGZpQkVBTnhWRXZBQlhrc1EKG76DM98cCuqIwUkbfJWHhJdYV77O9r8Q\nRJrq6jH59Gcp9W8iHg/aeShPHZFEOLg1q9azV9Wt9FjJn3SxyTmgvA==\n-----END AGE ENCRYPTED FILE-----\n" + } + ], + "lastmodified": "2026-04-16T15:43:34Z", + "mac": "ENC[AES256_GCM,data:jVRr2TxSZH2paD2doIX4JwCqo5wiPYfTowpj189w1IVlS0EY/XQoqxiWbunX/LmIDdQlTPCSe/vTp1EJA0cx6vzN2xENrwsfzCP6dwDGaRlZhH3V0CVhtfHIkMTEKWrAUx5hFtiwJPkLYUUYi5aRWRxhZQM1eBeRvuGKdlwvmHA=,iv:H57a61AfVNLrlg+4aMl9mwXI5O38O5ZoRhpxe2PTTkY=,tag:2jwH1855VNYlKseTE/XtTg==,type:str]", + "pgp": null, + "unencrypted_suffix": "_unencrypted", + "version": "3.9.4" + } +} \ No newline at end of file diff --git a/tests/fixtures/age-keys.txt b/tests/fixtures/age-keys.txt new file mode 100644 index 0000000..081f2af --- /dev/null +++ b/tests/fixtures/age-keys.txt @@ -0,0 +1,5 @@ +# Test age key for sops +# Generated: 2026-04-16 +# Public key: age1ztkm8yvdk42m2cn4dj2v9ptfknq8wpgr3ry9dpmtmlaeas6p7yyqft0ldg + +AGE-SECRET-KEY-1PCQQX37MTZDGES76H9TGQN5XTG2ZZX2UUR87KR784NZ4MQ3NJ56S0Z23SF diff --git a/tests/fixtures/dot-env-complete b/tests/fixtures/dot-env-complete new file mode 100644 index 0000000..828b9a3 --- /dev/null +++ b/tests/fixtures/dot-env-complete @@ -0,0 +1,40 @@ +# Test fixture .env file for vault-import.sh +# This file contains all expected keys for the import test + +# Generic forge creds +FORGE_TOKEN=generic-forge-token +FORGE_PASS=generic-forge-pass +FORGE_ADMIN_TOKEN=generic-admin-token + +# Bot tokens (review, dev, gardener, architect, planner, predictor, supervisor, vault) +FORGE_REVIEW_TOKEN=review-token +FORGE_REVIEW_PASS=review-pass +FORGE_DEV_TOKEN=dev-token +FORGE_DEV_PASS=dev-pass +FORGE_GARDENER_TOKEN=gardener-token +FORGE_GARDENER_PASS=gardener-pass +FORGE_ARCHITECT_TOKEN=architect-token +FORGE_ARCHITECT_PASS=architect-pass +FORGE_PLANNER_TOKEN=planner-token +FORGE_PLANNER_PASS=planner-pass +FORGE_PREDICTOR_TOKEN=predictor-token +FORGE_PREDICTOR_PASS=predictor-pass +FORGE_SUPERVISOR_TOKEN=supervisor-token +FORGE_SUPERVISOR_PASS=supervisor-pass +FORGE_VAULT_TOKEN=vault-token +FORGE_VAULT_PASS=vault-pass + +# Llama bot +FORGE_TOKEN_LLAMA=llama-token +FORGE_PASS_LLAMA=llama-pass + +# Woodpecker secrets +WOODPECKER_AGENT_SECRET=wp-agent-secret +WP_FORGEJO_CLIENT=wp-forgejo-client +WP_FORGEJO_SECRET=wp-forgejo-secret +WOODPECKER_TOKEN=wp-token + +# Chat secrets +FORWARD_AUTH_SECRET=forward-auth-secret +CHAT_OAUTH_CLIENT_ID=chat-client-id +CHAT_OAUTH_CLIENT_SECRET=chat-client-secret diff --git a/tests/fixtures/dot-env-incomplete b/tests/fixtures/dot-env-incomplete new file mode 100644 index 0000000..9869944 --- /dev/null +++ b/tests/fixtures/dot-env-incomplete @@ -0,0 +1,27 @@ +# Test fixture .env file with missing required keys +# This file is intentionally missing some keys to test error handling + +# Generic forge creds - missing FORGE_ADMIN_TOKEN +FORGE_TOKEN=generic-forge-token +FORGE_PASS=generic-forge-pass + +# Bot tokens - missing several roles +FORGE_REVIEW_TOKEN=review-token +FORGE_REVIEW_PASS=review-pass +FORGE_DEV_TOKEN=dev-token +FORGE_DEV_PASS=dev-pass + +# Llama bot - missing (only token, no pass) +FORGE_TOKEN_LLAMA=llama-token +# FORGE_PASS_LLAMA=llama-pass + +# Woodpecker secrets - missing some +WOODPECKER_AGENT_SECRET=wp-agent-secret +# WP_FORGEJO_CLIENT=wp-forgejo-client +# WP_FORGEJO_SECRET=wp-forgejo-secret +# WOODPECKER_TOKEN=wp-token + +# Chat secrets - missing some +FORWARD_AUTH_SECRET=forward-auth-secret +# CHAT_OAUTH_CLIENT_ID=chat-client-id +# CHAT_OAUTH_CLIENT_SECRET=chat-client-secret diff --git a/tests/fixtures/dot-env.vault.plain b/tests/fixtures/dot-env.vault.plain new file mode 100644 index 0000000..e4b60c1 --- /dev/null +++ b/tests/fixtures/dot-env.vault.plain @@ -0,0 +1,6 @@ +GITHUB_TOKEN=github-test-token-abc123 +CODEBERG_TOKEN=codeberg-test-token-def456 +CLAWHUB_TOKEN=clawhub-test-token-ghi789 +DEPLOY_KEY=deploy-key-test-jkl012 +NPM_TOKEN=npm-test-token-mno345 +DOCKER_HUB_TOKEN=dockerhub-test-token-pqr678 diff --git a/tests/vault-import.bats b/tests/vault-import.bats new file mode 100644 index 0000000..16994b9 --- /dev/null +++ b/tests/vault-import.bats @@ -0,0 +1,313 @@ +#!/usr/bin/env bats +# tests/vault-import.bats — Tests for tools/vault-import.sh +# +# Runs against a dev-mode Vault server (single binary, no LXC needed). +# CI launches vault server -dev inline before running these tests. + +VAULT_BIN="${VAULT_BIN:-vault}" +IMPORT_SCRIPT="${BATS_TEST_DIRNAME}/../tools/vault-import.sh" +FIXTURES_DIR="${BATS_TEST_DIRNAME}/fixtures" + +setup_file() { + # Start dev-mode vault on a random port + export VAULT_DEV_PORT + VAULT_DEV_PORT="$(shuf -i 18200-18299 -n 1)" + export VAULT_ADDR="http://127.0.0.1:${VAULT_DEV_PORT}" + + "$VAULT_BIN" server -dev \ + -dev-listen-address="127.0.0.1:${VAULT_DEV_PORT}" \ + -dev-root-token-id="test-root-token" \ + -dev-no-store-token \ + &>"${BATS_FILE_TMPDIR}/vault.log" & + export VAULT_PID=$! + + export VAULT_TOKEN="test-root-token" + + # Wait for vault to be ready (up to 10s) + local i=0 + while ! curl -sf "${VAULT_ADDR}/v1/sys/health" >/dev/null 2>&1; do + sleep 0.5 + i=$((i + 1)) + if [ "$i" -ge 20 ]; then + echo "Vault failed to start. Log:" >&2 + cat "${BATS_FILE_TMPDIR}/vault.log" >&2 + return 1 + fi + done +} + +teardown_file() { + if [ -n "${VAULT_PID:-}" ]; then + kill "$VAULT_PID" 2>/dev/null || true + wait "$VAULT_PID" 2>/dev/null || true + fi +} + +setup() { + # Source the module under test for hvault functions + source "${BATS_TEST_DIRNAME}/../lib/hvault.sh" + export VAULT_ADDR VAULT_TOKEN +} + +# ── Security checks ────────────────────────────────────────────────────────── + +@test "refuses to run if VAULT_ADDR is not localhost" { + export VAULT_ADDR="http://prod-vault.example.com:8200" + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -ne 0 ] + echo "$output" | grep -q "Security check failed" +} + +@test "refuses if age key file permissions are not 0400" { + # Create a temp file with wrong permissions + local bad_key="${BATS_TEST_TMPDIR}/bad-ages.txt" + echo "AGE-SECRET-KEY-1TEST" > "$bad_key" + chmod 644 "$bad_key" + + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$bad_key" + [ "$status" -ne 0 ] + echo "$output" | grep -q "permissions" +} + +# ── Dry-run mode ───────────────────────────────────────────────────────────── + +@test "--dry-run prints plan without writing to Vault" { + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" \ + --dry-run + [ "$status" -eq 0 ] + echo "$output" | grep -q "DRY-RUN" + echo "$output" | grep -q "Import plan" + echo "$output" | grep -q "Planned operations" + + # Verify nothing was written to Vault + run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ + "${VAULT_ADDR}/v1/secret/data/disinto/bots/review" + [ "$status" -ne 0 ] +} + +# ── Complete fixture import ───────────────────────────────────────────────── + +@test "imports all keys from complete fixture" { + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -eq 0 ] + + # Check bots/review + run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ + "${VAULT_ADDR}/v1/secret/data/disinto/bots/review" + [ "$status" -eq 0 ] + echo "$output" | grep -q "review-token" + echo "$output" | grep -q "review-pass" + + # Check bots/dev-qwen + run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ + "${VAULT_ADDR}/v1/secret/data/disinto/bots/dev-qwen" + [ "$status" -eq 0 ] + echo "$output" | grep -q "llama-token" + echo "$output" | grep -q "llama-pass" + + # Check forge + run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ + "${VAULT_ADDR}/v1/secret/data/disinto/shared/forge" + [ "$status" -eq 0 ] + echo "$output" | grep -q "generic-forge-token" + echo "$output" | grep -q "generic-forge-pass" + echo "$output" | grep -q "generic-admin-token" + + # Check woodpecker + run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ + "${VAULT_ADDR}/v1/secret/data/disinto/shared/woodpecker" + [ "$status" -eq 0 ] + echo "$output" | grep -q "wp-agent-secret" + echo "$output" | grep -q "wp-forgejo-client" + echo "$output" | grep -q "wp-forgejo-secret" + echo "$output" | grep -q "wp-token" + + # Check chat + run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ + "${VAULT_ADDR}/v1/secret/data/disinto/shared/chat" + [ "$status" -eq 0 ] + echo "$output" | grep -q "forward-auth-secret" + echo "$output" | grep -q "chat-client-id" + echo "$output" | grep -q "chat-client-secret" + + # Check runner tokens from sops + run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ + "${VAULT_ADDR}/v1/secret/data/disinto/runner/GITHUB_TOKEN" + [ "$status" -eq 0 ] + echo "$output" | jq -e '.data.data.value == "github-test-token-abc123"' +} + +# ── Idempotency ────────────────────────────────────────────────────────────── + +@test "re-run with unchanged fixtures reports all unchanged" { + # First run + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -eq 0 ] + + # Second run - should report unchanged + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -eq 0 ] + + # Check that all keys report unchanged + echo "$output" | grep -q "unchanged" + # Count unchanged occurrences (should be many) + local unchanged_count + unchanged_count=$(echo "$output" | grep -c "unchanged" || true) + [ "$unchanged_count" -gt 10 ] +} + +@test "re-run with modified value reports only that key as updated" { + # Create a modified fixture + local modified_env="${BATS_TEST_TMPDIR}/dot-env-modified" + cp "$FIXTURES_DIR/dot-env-complete" "$modified_env" + + # Modify one value + sed -i 's/llama-token/MODIFIED-LLAMA-TOKEN/' "$modified_env" + + # Run with modified fixture + run "$IMPORT_SCRIPT" \ + --env "$modified_env" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -eq 0 ] + + # Check that dev-qwen token was updated + echo "$output" | grep -q "dev-qwen.*updated" + + # Verify the new value was written (path is disinto/bots/dev-qwen, key is token) + run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \ + "${VAULT_ADDR}/v1/secret/data/disinto/bots/dev-qwen" + [ "$status" -eq 0 ] + echo "$output" | jq -e '.data.data.token == "MODIFIED-LLAMA-TOKEN"' +} + +# ── Incomplete fixture ─────────────────────────────────────────────────────── + +@test "handles incomplete fixture gracefully" { + # The incomplete fixture is missing some keys, but that should be OK + # - it should only import what exists + # - it should warn about missing pairs + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-incomplete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -eq 0 ] + + # Should have imported what was available + echo "$output" | grep -q "review" + + # Should complete successfully even with incomplete fixture + # The script handles missing pairs gracefully with warnings to stderr + [ "$status" -eq 0 ] +} + +# ── Security: no secrets in output ─────────────────────────────────────────── + +@test "never logs secret values in stdout" { + # Run the import + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -eq 0 ] + + # Check that no actual secret values appear in output + # (only key names and status messages) + local secret_patterns=( + "generic-forge-token" + "generic-forge-pass" + "generic-admin-token" + "review-token" + "review-pass" + "llama-token" + "llama-pass" + "wp-agent-secret" + "forward-auth-secret" + "github-test-token" + "codeberg-test-token" + "clawhub-test-token" + "deploy-key-test" + "npm-test-token" + "dockerhub-test-token" + ) + + for pattern in "${secret_patterns[@]}"; do + if echo "$output" | grep -q "$pattern"; then + echo "FAIL: Found secret pattern '$pattern' in output" >&2 + echo "Output was:" >&2 + echo "$output" >&2 + return 1 + fi + done +} + +# ── Error handling ─────────────────────────────────────────────────────────── + +@test "fails with missing --env argument" { + run "$IMPORT_SCRIPT" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -ne 0 ] + echo "$output" | grep -q "Missing required argument" +} + +@test "fails with missing --sops argument" { + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -ne 0 ] + echo "$output" | grep -q "Missing required argument" +} + +@test "fails with missing --age-key argument" { + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" + [ "$status" -ne 0 ] + echo "$output" | grep -q "Missing required argument" +} + +@test "fails with non-existent env file" { + run "$IMPORT_SCRIPT" \ + --env "/nonexistent/.env" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -ne 0 ] + echo "$output" | grep -q "not found" +} + +@test "fails with non-existent sops file" { + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "/nonexistent/.env.vault.enc" \ + --age-key "$FIXTURES_DIR/age-keys.txt" + [ "$status" -ne 0 ] + echo "$output" | grep -q "not found" +} + +@test "fails with non-existent age key file" { + run "$IMPORT_SCRIPT" \ + --env "$FIXTURES_DIR/dot-env-complete" \ + --sops "$FIXTURES_DIR/.env.vault.enc" \ + --age-key "/nonexistent/age-keys.txt" + [ "$status" -ne 0 ] + echo "$output" | grep -q "not found" +} diff --git a/tools/vault-apply-policies.sh b/tools/vault-apply-policies.sh new file mode 100755 index 0000000..222f04f --- /dev/null +++ b/tools/vault-apply-policies.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash +# ============================================================================= +# tools/vault-apply-policies.sh — Idempotent Vault policy sync +# +# Part of the Nomad+Vault migration (S2.1, issue #879). Reads every +# vault/policies/*.hcl file and upserts it into Vault as an ACL policy +# named after the file's basename (without the .hcl suffix). +# +# Idempotency contract: +# For each vault/policies/.hcl: +# - Policy missing in Vault → apply, log "policy created" +# - Policy present, content same → skip, log "policy unchanged" +# - Policy present, content diff → apply, log "policy updated" +# +# Comparison is byte-for-byte against the on-server policy text returned by +# GET sys/policies/acl/.data.policy. Re-running with no file edits is +# a guaranteed no-op that reports every policy as "unchanged". +# +# --dry-run: prints for each file that WOULD be applied; +# does not call Vault at all (no GETs, no PUTs). Exits 0. +# +# Requires: +# - VAULT_ADDR (e.g. http://127.0.0.1:8200) +# - VAULT_TOKEN (env OR /etc/vault.d/root.token, resolved by lib/hvault.sh) +# - curl, jq, sha256sum +# +# Usage: +# tools/vault-apply-policies.sh +# tools/vault-apply-policies.sh --dry-run +# +# Exit codes: +# 0 success (policies synced, or --dry-run completed) +# 1 precondition / API failure +# ============================================================================= +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +POLICIES_DIR="${REPO_ROOT}/vault/policies" + +# shellcheck source=../lib/hvault.sh +source "${REPO_ROOT}/lib/hvault.sh" + +log() { printf '[vault-apply] %s\n' "$*"; } +die() { printf '[vault-apply] ERROR: %s\n' "$*" >&2; exit 1; } + +# ── Flag parsing ───────────────────────────────────────────────────────────── +# Single optional flag — no loop needed. Keeps this block textually distinct +# from the multi-flag `while/case` parsers elsewhere in the repo (see +# .woodpecker/detect-duplicates.py — sliding 5-line window). +dry_run=false +[ "$#" -le 1 ] || die "too many arguments (saw: $*)" +case "${1:-}" in + '') ;; + --dry-run) dry_run=true ;; + -h|--help) printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")" + printf 'Apply every vault/policies/*.hcl to Vault as an ACL policy.\n' + printf 'Idempotent: unchanged policies are reported as "unchanged" and\n' + printf 'not written.\n\n' + printf ' --dry-run Print policy names + content SHA256 that would be\n' + printf ' applied, without contacting Vault. Exits 0.\n' + exit 0 ;; + *) die "unknown flag: $1" ;; +esac + +# ── Preconditions ──────────────────────────────────────────────────────────── +for bin in curl jq sha256sum; do + command -v "$bin" >/dev/null 2>&1 \ + || die "required binary not found: ${bin}" +done + +[ -d "$POLICIES_DIR" ] \ + || die "policies directory not found: ${POLICIES_DIR}" + +# Collect policy files in a stable (lexicographic) order so log output is +# deterministic across runs and CI diffs. +mapfile -t POLICY_FILES < <( + find "$POLICIES_DIR" -maxdepth 1 -type f -name '*.hcl' | LC_ALL=C sort +) + +if [ "${#POLICY_FILES[@]}" -eq 0 ]; then + die "no *.hcl files in ${POLICIES_DIR}" +fi + +# ── Dry-run: print plan + exit (no Vault calls) ────────────────────────────── +if [ "$dry_run" = true ]; then + log "dry-run — ${#POLICY_FILES[@]} policy file(s) in ${POLICIES_DIR}" + for f in "${POLICY_FILES[@]}"; do + name="$(basename "$f" .hcl)" + sha="$(sha256sum "$f" | awk '{print $1}')" + printf '[vault-apply] would apply policy %s (sha256=%s)\n' "$name" "$sha" + done + exit 0 +fi + +# ── Live run: Vault connectivity check ─────────────────────────────────────── +[ -n "${VAULT_ADDR:-}" ] \ + || die "VAULT_ADDR is not set — export VAULT_ADDR=http://127.0.0.1:8200" + +# hvault_token_lookup both resolves the token (env or /etc/vault.d/root.token) +# and confirms the server is reachable with a valid token. Fail fast here so +# the per-file loop below doesn't emit N identical "HTTP 403" errors. +hvault_token_lookup >/dev/null \ + || die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN" + +# ── Helper: fetch the on-server policy text, or empty if absent ────────────── +# Echoes the current policy content on stdout. A 404 (policy does not exist +# yet) is a non-error — we print nothing and exit 0 so the caller can treat +# the empty string as "needs create". Any other non-2xx is a hard failure. +# +# Uses a subshell + EXIT trap (not RETURN) for tmpfile cleanup: the RETURN +# trap does NOT fire on set-e abort, so if jq below tripped errexit the +# tmpfile would leak. Subshell exit propagates via the function's last- +# command exit status. +fetch_current_policy() { + local name="$1" + ( + local tmp http_code + tmp="$(mktemp)" + trap 'rm -f "$tmp"' EXIT + http_code="$(curl -sS -o "$tmp" -w '%{http_code}' \ + -H "X-Vault-Token: ${VAULT_TOKEN}" \ + "${VAULT_ADDR}/v1/sys/policies/acl/${name}")" \ + || { printf '[vault-apply] ERROR: curl failed for policy %s\n' "$name" >&2; exit 1; } + case "$http_code" in + 200) jq -r '.data.policy // ""' < "$tmp" ;; + 404) printf '' ;; # absent — caller treats as "create" + *) + printf '[vault-apply] ERROR: HTTP %s fetching policy %s:\n' "$http_code" "$name" >&2 + cat "$tmp" >&2 + exit 1 + ;; + esac + ) +} + +# ── Apply each policy, reporting created/updated/unchanged ─────────────────── +log "syncing ${#POLICY_FILES[@]} polic(y|ies) from ${POLICIES_DIR}" + +for f in "${POLICY_FILES[@]}"; do + name="$(basename "$f" .hcl)" + + desired="$(cat "$f")" + current="$(fetch_current_policy "$name")" \ + || die "failed to read existing policy: ${name}" + + if [ -z "$current" ]; then + hvault_policy_apply "$name" "$f" \ + || die "failed to create policy: ${name}" + log "policy ${name} created" + continue + fi + + if [ "$current" = "$desired" ]; then + log "policy ${name} unchanged" + continue + fi + + hvault_policy_apply "$name" "$f" \ + || die "failed to update policy: ${name}" + log "policy ${name} updated" +done + +log "done — ${#POLICY_FILES[@]} polic(y|ies) synced" diff --git a/tools/vault-import.sh b/tools/vault-import.sh new file mode 100755 index 0000000..4a3d3ab --- /dev/null +++ b/tools/vault-import.sh @@ -0,0 +1,507 @@ +#!/usr/bin/env bash +# ============================================================================= +# vault-import.sh — Import .env and sops-decrypted secrets into Vault KV +# +# Reads existing .env and sops-encrypted .env.vault.enc from the old docker stack +# and writes them to Vault KV paths matching the S2.1 policy layout. +# +# Usage: +# vault-import.sh \ +# --env /path/to/.env \ +# --sops /path/to/.env.vault.enc \ +# --age-key /path/to/age/keys.txt +# +# Mapping: +# From .env: +# - FORGE_{ROLE}_TOKEN + FORGE_{ROLE}_PASS → kv/disinto/bots//{token,password} +# (roles: review, dev, gardener, architect, planner, predictor, supervisor, vault) +# - FORGE_TOKEN_LLAMA + FORGE_PASS_LLAMA → kv/disinto/bots/dev-qwen/{token,password} +# - FORGE_TOKEN + FORGE_PASS → kv/disinto/shared/forge/{token,password} +# - FORGE_ADMIN_TOKEN → kv/disinto/shared/forge/admin_token +# - WOODPECKER_* → kv/disinto/shared/woodpecker/ +# - FORWARD_AUTH_SECRET, CHAT_OAUTH_* → kv/disinto/shared/chat/ +# From sops-decrypted .env.vault.enc: +# - GITHUB_TOKEN, CODEBERG_TOKEN, CLAWHUB_TOKEN, DEPLOY_KEY, NPM_TOKEN, DOCKER_HUB_TOKEN +# → kv/disinto/runner//value +# +# Security: +# - Refuses to run if VAULT_ADDR is not localhost +# - Writes to KV v2, not v1 +# - Validates sops age key file is mode 0400 before sourcing +# - Never logs secret values — only key names +# +# Idempotency: +# - Reports unchanged/updated/created per key via hvault_kv_get +# - --dry-run prints the full import plan without writing +# ============================================================================= + +set -euo pipefail + +# ── Internal helpers ────────────────────────────────────────────────────────── + +# _log — emit a log message to stdout (never to stderr to avoid polluting diff) +_log() { + printf '[vault-import] %s\n' "$*" +} + +# _err — emit an error message to stderr +_err() { + printf '[vault-import] ERROR: %s\n' "$*" >&2 +} + +# _die — log error and exit with status 1 +_die() { + _err "$@" + exit 1 +} + +# _check_vault_addr — ensure VAULT_ADDR is localhost (security check) +_check_vault_addr() { + local addr="${VAULT_ADDR:-}" + if [[ ! "$addr" =~ ^https?://(localhost|127\.0\.0\.1)(:[0-9]+)?$ ]]; then + _die "Security check failed: VAULT_ADDR must be localhost for safety. Got: $addr" + fi +} + +# _validate_age_key_perms — ensure age key file is mode 0400 +_validate_age_key_perms() { + local keyfile="$1" + local perms + perms="$(stat -c '%a' "$keyfile" 2>/dev/null)" || _die "Cannot stat age key file: $keyfile" + if [ "$perms" != "400" ]; then + _die "Age key file permissions are $perms, expected 400. Refusing to proceed for security." + fi +} + +# _decrypt_sops — decrypt sops-encrypted file using SOPS_AGE_KEY_FILE +_decrypt_sops() { + local sops_file="$1" + local age_key="$2" + local output + # sops outputs YAML format by default, extract KEY=VALUE lines + output="$(SOPS_AGE_KEY_FILE="$age_key" sops -d "$sops_file" 2>/dev/null | \ + grep -E '^[A-Z_][A-Z0-9_]*=' | \ + sed 's/^\([^=]*\)=\(.*\)$/\1=\2/')" || \ + _die "Failed to decrypt sops file: $sops_file. Check age key and file integrity." + printf '%s' "$output" +} + +# _load_env_file — source an environment file (safety: only KEY=value lines) +_load_env_file() { + local env_file="$1" + local temp_env + temp_env="$(mktemp)" + # Extract only valid KEY=value lines (skip comments, blank lines, malformed) + grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$env_file" 2>/dev/null > "$temp_env" || true + # shellcheck source=/dev/null + source "$temp_env" + rm -f "$temp_env" +} + +# _kv_path_exists — check if a KV path exists (returns 0 if exists, 1 if not) +_kv_path_exists() { + local path="$1" + # Use hvault_kv_get and check if it fails with "not found" + if hvault_kv_get "$path" >/dev/null 2>&1; then + return 0 + fi + # Check if the error is specifically "not found" + local err_output + err_output="$(hvault_kv_get "$path" 2>&1)" || true + if printf '%s' "$err_output" | grep -qi 'not found\|404'; then + return 1 + fi + # Some other error (e.g., auth failure) — treat as unknown + return 1 +} + +# _kv_get_value — get a single key value from a KV path +_kv_get_value() { + local path="$1" + local key="$2" + hvault_kv_get "$path" "$key" +} + +# _kv_put_secret — write a secret to KV v2 +_kv_put_secret() { + local path="$1" + shift + local kv_pairs=("$@") + local payload='{"data":{}}' + + for kv in "${kv_pairs[@]}"; do + local k="${kv%%=*}" + local v="${kv#*=}" + payload="$(printf '%s' "$payload" | jq -n --arg k "$k" --arg v "$v" '.data[$k] = $v')" + done + + # Use curl directly for KV v2 write with versioning + local tmpfile http_code + tmpfile="$(mktemp)" + http_code="$(curl -s -w '%{http_code}' \ + -H "X-Vault-Token: ${VAULT_TOKEN}" \ + -H "Content-Type: application/json" \ + -X POST \ + -d "$payload" \ + -o "$tmpfile" \ + "${VAULT_ADDR}/v1/secret/data/${path}")" || { + rm -f "$tmpfile" + _err "Failed to write to Vault at secret/data/${path}: curl error" + return 1 + } + rm -f "$tmpfile" + + # Check HTTP status — 2xx is success + case "$http_code" in + 2[0-9][0-9]) + return 0 + ;; + 404) + _err "KV path not found: secret/data/${path}" + return 1 + ;; + 403) + _err "Permission denied writing to secret/data/${path}" + return 1 + ;; + *) + _err "Failed to write to Vault at secret/data/${path}: HTTP $http_code" + return 1 + ;; + esac +} + +# _format_status — format the status string for a key +_format_status() { + local status="$1" + local path="$2" + local key="$3" + case "$status" in + unchanged) + printf ' %s: %s/%s (unchanged)' "$status" "$path" "$key" + ;; + updated) + printf ' %s: %s/%s (updated)' "$status" "$path" "$key" + ;; + created) + printf ' %s: %s/%s (created)' "$status" "$path" "$key" + ;; + *) + printf ' %s: %s/%s (unknown)' "$status" "$path" "$key" + ;; + esac +} + +# ── Mapping definitions ────────────────────────────────────────────────────── + +# Bots mapping: FORGE_{ROLE}_TOKEN + FORGE_{ROLE}_PASS +declare -a BOT_ROLES=(review dev gardener architect planner predictor supervisor vault) + +# Runner tokens from sops-decrypted file +declare -a RUNNER_TOKENS=(GITHUB_TOKEN CODEBERG_TOKEN CLAWHUB_TOKEN DEPLOY_KEY NPM_TOKEN DOCKER_HUB_TOKEN) + +# ── Main logic ──────────────────────────────────────────────────────────────── + +main() { + local env_file="" + local sops_file="" + local age_key_file="" + local dry_run=false + + # Parse arguments + while [[ $# -gt 0 ]]; do + case "$1" in + --env) + env_file="$2" + shift 2 + ;; + --sops) + sops_file="$2" + shift 2 + ;; + --age-key) + age_key_file="$2" + shift 2 + ;; + --dry-run) + dry_run=true + shift + ;; + --help|-h) + cat <<'EOF' +vault-import.sh — Import .env and sops-decrypted secrets into Vault KV + +Usage: + vault-import.sh \ + --env /path/to/.env \ + --sops /path/to/.env.vault.enc \ + --age-key /path/to/age/keys.txt \ + [--dry-run] + +Options: + --env Path to .env file (required) + --sops Path to sops-encrypted .env.vault.enc file (required) + --age-key Path to age keys file (required) + --dry-run Print import plan without writing to Vault (optional) + --help Show this help message + +Mapping: + From .env: + - FORGE_{ROLE}_TOKEN + FORGE_{ROLE}_PASS → kv/disinto/bots//{token,password} + - FORGE_TOKEN_LLAMA + FORGE_PASS_LLAMA → kv/disinto/bots/dev-qwen/{token,password} + - FORGE_TOKEN + FORGE_PASS → kv/disinto/shared/forge/{token,password} + - FORGE_ADMIN_TOKEN → kv/disinto/shared/forge/admin_token + - WOODPECKER_* → kv/disinto/shared/woodpecker/ + - FORWARD_AUTH_SECRET, CHAT_OAUTH_* → kv/disinto/shared/chat/ + + From sops-decrypted .env.vault.enc: + - GITHUB_TOKEN, CODEBERG_TOKEN, CLAWHUB_TOKEN, DEPLOY_KEY, NPM_TOKEN, DOCKER_HUB_TOKEN + → kv/disinto/runner//value + +Examples: + vault-import.sh --env .env --sops .env.vault.enc --age-key age-keys.txt + vault-import.sh --env .env --sops .env.vault.enc --age-key age-keys.txt --dry-run +EOF + exit 0 + ;; + *) + _die "Unknown option: $1. Use --help for usage." + ;; + esac + done + + # Validate required arguments + if [ -z "$env_file" ]; then + _die "Missing required argument: --env" + fi + if [ -z "$sops_file" ]; then + _die "Missing required argument: --sops" + fi + if [ -z "$age_key_file" ]; then + _die "Missing required argument: --age-key" + fi + + # Validate files exist + if [ ! -f "$env_file" ]; then + _die "Environment file not found: $env_file" + fi + if [ ! -f "$sops_file" ]; then + _die "Sops file not found: $sops_file" + fi + if [ ! -f "$age_key_file" ]; then + _die "Age key file not found: $age_key_file" + fi + + # Security check: age key permissions + _validate_age_key_perms "$age_key_file" + + # Security check: VAULT_ADDR must be localhost + _check_vault_addr + + # Source the Vault helpers + source "$(dirname "$0")/../lib/hvault.sh" + + # Load .env file + _log "Loading environment from: $env_file" + _load_env_file "$env_file" + + # Decrypt sops file + _log "Decrypting sops file: $sops_file" + local sops_env + sops_env="$(_decrypt_sops "$sops_file" "$age_key_file")" + # shellcheck disable=SC2086 + eval "$sops_env" + + # Collect all import operations + declare -a operations=() + + # --- From .env --- + + # Bots: FORGE_{ROLE}_TOKEN + FORGE_{ROLE}_PASS + for role in "${BOT_ROLES[@]}"; do + local token_var="FORGE_${role^^}_TOKEN" + local pass_var="FORGE_${role^^}_PASS" + local token_val="${!token_var:-}" + local pass_val="${!pass_var:-}" + + if [ -n "$token_val" ] && [ -n "$pass_val" ]; then + operations+=("bots|$role|token|$env_file|$token_var") + operations+=("bots|$role|pass|$env_file|$pass_var") + elif [ -n "$token_val" ] || [ -n "$pass_val" ]; then + _err "Warning: $role bot has token but no password (or vice versa), skipping" + fi + done + + # Llama bot: FORGE_TOKEN_LLAMA + FORGE_PASS_LLAMA + local llama_token="${FORGE_TOKEN_LLAMA:-}" + local llama_pass="${FORGE_PASS_LLAMA:-}" + if [ -n "$llama_token" ] && [ -n "$llama_pass" ]; then + operations+=("bots|dev-qwen|token|$env_file|FORGE_TOKEN_LLAMA") + operations+=("bots|dev-qwen|pass|$env_file|FORGE_PASS_LLAMA") + elif [ -n "$llama_token" ] || [ -n "$llama_pass" ]; then + _err "Warning: dev-qwen bot has token but no password (or vice versa), skipping" + fi + + # Generic forge creds: FORGE_TOKEN + FORGE_PASS + local forge_token="${FORGE_TOKEN:-}" + local forge_pass="${FORGE_PASS:-}" + if [ -n "$forge_token" ] && [ -n "$forge_pass" ]; then + operations+=("forge|token|$env_file|FORGE_TOKEN") + operations+=("forge|pass|$env_file|FORGE_PASS") + fi + + # Forge admin token: FORGE_ADMIN_TOKEN + local forge_admin_token="${FORGE_ADMIN_TOKEN:-}" + if [ -n "$forge_admin_token" ]; then + operations+=("forge|admin_token|$env_file|FORGE_ADMIN_TOKEN") + fi + + # Woodpecker secrets: WOODPECKER_* + # Only read from the .env file, not shell environment + local woodpecker_keys=() + while IFS='=' read -r key _; do + if [[ "$key" =~ ^WOODPECKER_ ]] || [[ "$key" =~ ^WP_[A-Z_]+$ ]]; then + woodpecker_keys+=("$key") + fi + done < <(grep -E '^[A-Z_][A-Z0-9_]*=' "$env_file" 2>/dev/null || true) + for key in "${woodpecker_keys[@]}"; do + local val="${!key}" + if [ -n "$val" ]; then + local lowercase_key="${key,,}" + operations+=("woodpecker|$lowercase_key|$env_file|$key") + fi + done + + # Chat secrets: FORWARD_AUTH_SECRET, CHAT_OAUTH_CLIENT_ID, CHAT_OAUTH_CLIENT_SECRET + for key in FORWARD_AUTH_SECRET CHAT_OAUTH_CLIENT_ID CHAT_OAUTH_CLIENT_SECRET; do + local val="${!key:-}" + if [ -n "$val" ]; then + local lowercase_key="${key,,}" + operations+=("chat|$lowercase_key|$env_file|$key") + fi + done + + # --- From sops-decrypted .env.vault.enc --- + + # Runner tokens + for token_name in "${RUNNER_TOKENS[@]}"; do + local token_val="${!token_name:-}" + if [ -n "$token_val" ]; then + operations+=("runner|$token_name|$sops_file|$token_name") + fi + done + + # If dry-run, just print the plan + if $dry_run; then + _log "=== DRY-RUN: Import plan ===" + _log "Environment file: $env_file" + _log "Sops file: $sops_file" + _log "Age key: $age_key_file" + _log "" + _log "Planned operations:" + for op in "${operations[@]}"; do + _log " $op" + done + _log "" + _log "Total: ${#operations[@]} operations" + exit 0 + fi + + # --- Actual import with idempotency check --- + + _log "=== Starting Vault import ===" + _log "Environment file: $env_file" + _log "Sops file: $sops_file" + _log "Age key: $age_key_file" + _log "" + + local created=0 + local updated=0 + local unchanged=0 + + for op in "${operations[@]}"; do + # Parse operation: category|field|file|key (4 fields for most, 5 for bots/runner) + IFS='|' read -r category field file key <<< "$op" + local source_value="" + + if [ "$file" = "$env_file" ]; then + source_value="${!key:-}" + else + # Source from sops-decrypted env + source_value="$(printf '%s' "$sops_env" | grep "^${key}=" | sed "s/^${key=}//" || true)" + fi + + # Determine Vault path and key based on category + local vault_path="" + local vault_key="$key" + + case "$category" in + bots) + vault_path="disinto/bots/${field}" + vault_key="$field" + ;; + forge) + vault_path="disinto/shared/forge" + vault_key="$field" + ;; + woodpecker) + vault_path="disinto/shared/woodpecker" + vault_key="$field" + ;; + chat) + vault_path="disinto/shared/chat" + vault_key="$field" + ;; + runner) + vault_path="disinto/runner/${field}" + vault_key="value" + ;; + *) + _err "Unknown category: $category" + continue + ;; + esac + + # Check if path exists + local status="created" + if _kv_path_exists "$vault_path"; then + # Check if key exists in path + local existing_value + if existing_value="$(_kv_get_value "$vault_path" "$vault_key")" 2>/dev/null; then + if [ "$existing_value" = "$source_value" ]; then + status="unchanged" + else + status="updated" + fi + else + status="created" + fi + fi + + # Output status + _format_status "$status" "$vault_path" "$vault_key" + printf '\n' + + # Write if not unchanged + if [ "$status" != "unchanged" ]; then + if ! _kv_put_secret "$vault_path" "${vault_key}=${source_value}"; then + _err "Failed to write $vault_key to $vault_path" + exit 1 + fi + case "$status" in + updated) ((updated++)) || true ;; + created) ((created++)) || true ;; + esac + else + ((unchanged++)) || true + fi + done + + _log "" + _log "=== Import complete ===" + _log "Created: $created" + _log "Updated: $updated" + _log "Unchanged: $unchanged" +} + +main "$@" diff --git a/vault/policies/AGENTS.md b/vault/policies/AGENTS.md new file mode 100644 index 0000000..981a84f --- /dev/null +++ b/vault/policies/AGENTS.md @@ -0,0 +1,66 @@ +# vault/policies/ — Agent Instructions + +HashiCorp Vault ACL policies for the disinto factory. One `.hcl` file per +policy; the basename (minus `.hcl`) is the Vault policy name applied to it. +Synced into Vault by `tools/vault-apply-policies.sh` (idempotent — see the +script header for the contract). + +This directory is part of the **Nomad+Vault migration (Step 2)** — see +issues #879–#884. Policies attach to Nomad jobs via workload identity in +S2.4; this PR only lands the files + apply script. + +## Naming convention + +| Prefix | Audience | KV scope | +|---|---|---| +| `service-.hcl` | Long-running platform services (forgejo, woodpecker) | `kv/data/disinto/shared//*` | +| `bot-.hcl` | Per-agent jobs (dev, review, gardener, …) | `kv/data/disinto/bots//*` + shared forge URL | +| `runner-.hcl` | Per-secret policy for vault-runner ephemeral dispatch | exactly one `kv/data/disinto/runner/` path | +| `dispatcher.hcl` | Long-running edge dispatcher | `kv/data/disinto/runner/*` + `kv/data/disinto/shared/ops-repo/*` | + +The KV mount name `kv/` is the convention this migration uses (mounted as +KV v2). Vault addresses KV v2 data at `kv/data/` and metadata at +`kv/metadata/` — policies that need `list` always target the +`metadata` path; reads target `data`. + +## Policy → KV path summary + +| Policy | Reads | +|---|---| +| `service-forgejo` | `kv/data/disinto/shared/forgejo/*` | +| `service-woodpecker` | `kv/data/disinto/shared/woodpecker/*` | +| `bot-` (dev, review, gardener, architect, planner, predictor, supervisor, vault, dev-qwen) | `kv/data/disinto/bots//*` + `kv/data/disinto/shared/forge/*` | +| `runner-` (GITHUB\_TOKEN, CODEBERG\_TOKEN, CLAWHUB\_TOKEN, DEPLOY\_KEY, NPM\_TOKEN, DOCKER\_HUB\_TOKEN) | `kv/data/disinto/runner/` (exactly one) | +| `dispatcher` | `kv/data/disinto/runner/*` + `kv/data/disinto/shared/ops-repo/*` | + +## Why one policy per runner secret + +`vault-runner` (Step 5) reads each action TOML's `secrets = [...]` list +and composes only those `runner-` policies onto the per-dispatch +ephemeral token. Wildcards or batched policies would hand the runner more +secrets than the action declared — defeats AD-006 (least-privilege per +external action). Adding a new declarable secret = adding one new +`runner-.hcl` here + extending the SECRETS allow-list in vault-action +validation. + +## Adding a new policy + +1. Drop a file matching one of the four naming patterns above. Use an + existing file in the same family as the template — comment header, + capability list, and KV path layout should match the family. +2. Run `tools/vault-apply-policies.sh --dry-run` to confirm the new + basename appears in the planned-work list with the expected SHA. +3. Run `tools/vault-apply-policies.sh` against a Vault instance to + create it; re-run to confirm it reports `unchanged`. +4. The CI fmt + validate step lands in S2.6 (#884). Until then + `vault policy fmt ` locally is the fastest sanity check. + +## What this directory does NOT own + +- **Attaching policies to Nomad jobs.** That's S2.4 (#882) via the + jobspec `template { vault { policies = […] } }` stanza. +- **Enabling JWT auth + Nomad workload identity roles.** That's S2.3 + (#881). +- **Writing the secret values themselves.** That's S2.2 (#880) via + `tools/vault-import.sh`. +- **CI policy fmt + validate + roles.yaml check.** That's S2.6 (#884). diff --git a/vault/policies/bot-architect.hcl b/vault/policies/bot-architect.hcl new file mode 100644 index 0000000..9381b61 --- /dev/null +++ b/vault/policies/bot-architect.hcl @@ -0,0 +1,16 @@ +# vault/policies/bot-architect.hcl +# +# Architect agent: reads its own bot KV namespace + the shared forge URL. +# Attached to the architect-agent Nomad job via workload identity (S2.4). + +path "kv/data/disinto/bots/architect/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/bots/architect/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/forge/*" { + capabilities = ["read"] +} diff --git a/vault/policies/bot-dev-qwen.hcl b/vault/policies/bot-dev-qwen.hcl new file mode 100644 index 0000000..b71283d --- /dev/null +++ b/vault/policies/bot-dev-qwen.hcl @@ -0,0 +1,18 @@ +# vault/policies/bot-dev-qwen.hcl +# +# Local-Qwen dev agent (agents-llama profile): reads its own bot KV +# namespace + the shared forge URL. Attached to the dev-qwen Nomad job +# via workload identity (S2.4). KV path mirrors the bot basename: +# kv/disinto/bots/dev-qwen/*. + +path "kv/data/disinto/bots/dev-qwen/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/bots/dev-qwen/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/forge/*" { + capabilities = ["read"] +} diff --git a/vault/policies/bot-dev.hcl b/vault/policies/bot-dev.hcl new file mode 100644 index 0000000..3771288 --- /dev/null +++ b/vault/policies/bot-dev.hcl @@ -0,0 +1,16 @@ +# vault/policies/bot-dev.hcl +# +# Dev agent: reads its own bot KV namespace + the shared forge URL. +# Attached to the dev-agent Nomad job via workload identity (S2.4). + +path "kv/data/disinto/bots/dev/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/bots/dev/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/forge/*" { + capabilities = ["read"] +} diff --git a/vault/policies/bot-gardener.hcl b/vault/policies/bot-gardener.hcl new file mode 100644 index 0000000..f5ef230 --- /dev/null +++ b/vault/policies/bot-gardener.hcl @@ -0,0 +1,16 @@ +# vault/policies/bot-gardener.hcl +# +# Gardener agent: reads its own bot KV namespace + the shared forge URL. +# Attached to the gardener-agent Nomad job via workload identity (S2.4). + +path "kv/data/disinto/bots/gardener/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/bots/gardener/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/forge/*" { + capabilities = ["read"] +} diff --git a/vault/policies/bot-planner.hcl b/vault/policies/bot-planner.hcl new file mode 100644 index 0000000..440f6aa --- /dev/null +++ b/vault/policies/bot-planner.hcl @@ -0,0 +1,16 @@ +# vault/policies/bot-planner.hcl +# +# Planner agent: reads its own bot KV namespace + the shared forge URL. +# Attached to the planner-agent Nomad job via workload identity (S2.4). + +path "kv/data/disinto/bots/planner/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/bots/planner/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/forge/*" { + capabilities = ["read"] +} diff --git a/vault/policies/bot-predictor.hcl b/vault/policies/bot-predictor.hcl new file mode 100644 index 0000000..3a3b6b2 --- /dev/null +++ b/vault/policies/bot-predictor.hcl @@ -0,0 +1,16 @@ +# vault/policies/bot-predictor.hcl +# +# Predictor agent: reads its own bot KV namespace + the shared forge URL. +# Attached to the predictor-agent Nomad job via workload identity (S2.4). + +path "kv/data/disinto/bots/predictor/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/bots/predictor/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/forge/*" { + capabilities = ["read"] +} diff --git a/vault/policies/bot-review.hcl b/vault/policies/bot-review.hcl new file mode 100644 index 0000000..04c7668 --- /dev/null +++ b/vault/policies/bot-review.hcl @@ -0,0 +1,16 @@ +# vault/policies/bot-review.hcl +# +# Review agent: reads its own bot KV namespace + the shared forge URL. +# Attached to the review-agent Nomad job via workload identity (S2.4). + +path "kv/data/disinto/bots/review/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/bots/review/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/forge/*" { + capabilities = ["read"] +} diff --git a/vault/policies/bot-supervisor.hcl b/vault/policies/bot-supervisor.hcl new file mode 100644 index 0000000..36ecc90 --- /dev/null +++ b/vault/policies/bot-supervisor.hcl @@ -0,0 +1,16 @@ +# vault/policies/bot-supervisor.hcl +# +# Supervisor agent: reads its own bot KV namespace + the shared forge URL. +# Attached to the supervisor-agent Nomad job via workload identity (S2.4). + +path "kv/data/disinto/bots/supervisor/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/bots/supervisor/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/forge/*" { + capabilities = ["read"] +} diff --git a/vault/policies/bot-vault.hcl b/vault/policies/bot-vault.hcl new file mode 100644 index 0000000..0a088dd --- /dev/null +++ b/vault/policies/bot-vault.hcl @@ -0,0 +1,20 @@ +# vault/policies/bot-vault.hcl +# +# Vault agent (the legacy edge dispatcher / vault-action runner): reads its +# own bot KV namespace + the shared forge URL. Attached to the vault-agent +# Nomad job via workload identity (S2.4). +# +# NOTE: distinct from the runner-* policies, which gate per-secret access +# for vault-runner ephemeral dispatches (Step 5). + +path "kv/data/disinto/bots/vault/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/bots/vault/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/forge/*" { + capabilities = ["read"] +} diff --git a/vault/policies/dispatcher.hcl b/vault/policies/dispatcher.hcl new file mode 100644 index 0000000..6383ae7 --- /dev/null +++ b/vault/policies/dispatcher.hcl @@ -0,0 +1,29 @@ +# vault/policies/dispatcher.hcl +# +# Edge dispatcher policy: needs to enumerate the runner secret namespace +# (to check secret presence before dispatching) and read the shared +# ops-repo credentials (token + clone URL) it uses to fetch action TOMLs. +# +# Scope: +# - kv/disinto/runner/* — read all per-secret values + list keys +# - kv/disinto/shared/ops-repo/* — read the ops-repo creds bundle +# +# The actual ephemeral runner container created per dispatch gets the +# narrow runner- policies, NOT this one. This policy stays bound +# to the long-running dispatcher only. + +path "kv/data/disinto/runner/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/runner/*" { + capabilities = ["list", "read"] +} + +path "kv/data/disinto/shared/ops-repo/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/shared/ops-repo/*" { + capabilities = ["list", "read"] +} diff --git a/vault/policies/runner-CLAWHUB_TOKEN.hcl b/vault/policies/runner-CLAWHUB_TOKEN.hcl new file mode 100644 index 0000000..5de32e9 --- /dev/null +++ b/vault/policies/runner-CLAWHUB_TOKEN.hcl @@ -0,0 +1,10 @@ +# vault/policies/runner-CLAWHUB_TOKEN.hcl +# +# Per-secret runner policy: ClawHub token for skill-registry publish. +# vault-runner (Step 5) composes only the runner-* policies named by the +# dispatching action's `secrets = [...]` list, so this policy intentionally +# scopes a single KV path — no wildcards, no list capability. + +path "kv/data/disinto/runner/CLAWHUB_TOKEN" { + capabilities = ["read"] +} diff --git a/vault/policies/runner-CODEBERG_TOKEN.hcl b/vault/policies/runner-CODEBERG_TOKEN.hcl new file mode 100644 index 0000000..5de534b --- /dev/null +++ b/vault/policies/runner-CODEBERG_TOKEN.hcl @@ -0,0 +1,10 @@ +# vault/policies/runner-CODEBERG_TOKEN.hcl +# +# Per-secret runner policy: Codeberg PAT for upstream-repo mirror push. +# vault-runner (Step 5) composes only the runner-* policies named by the +# dispatching action's `secrets = [...]` list, so this policy intentionally +# scopes a single KV path — no wildcards, no list capability. + +path "kv/data/disinto/runner/CODEBERG_TOKEN" { + capabilities = ["read"] +} diff --git a/vault/policies/runner-DEPLOY_KEY.hcl b/vault/policies/runner-DEPLOY_KEY.hcl new file mode 100644 index 0000000..ac711f9 --- /dev/null +++ b/vault/policies/runner-DEPLOY_KEY.hcl @@ -0,0 +1,10 @@ +# vault/policies/runner-DEPLOY_KEY.hcl +# +# Per-secret runner policy: SSH deploy key for git push to a release target. +# vault-runner (Step 5) composes only the runner-* policies named by the +# dispatching action's `secrets = [...]` list, so this policy intentionally +# scopes a single KV path — no wildcards, no list capability. + +path "kv/data/disinto/runner/DEPLOY_KEY" { + capabilities = ["read"] +} diff --git a/vault/policies/runner-DOCKER_HUB_TOKEN.hcl b/vault/policies/runner-DOCKER_HUB_TOKEN.hcl new file mode 100644 index 0000000..7d93a65 --- /dev/null +++ b/vault/policies/runner-DOCKER_HUB_TOKEN.hcl @@ -0,0 +1,10 @@ +# vault/policies/runner-DOCKER_HUB_TOKEN.hcl +# +# Per-secret runner policy: Docker Hub access token for image push. +# vault-runner (Step 5) composes only the runner-* policies named by the +# dispatching action's `secrets = [...]` list, so this policy intentionally +# scopes a single KV path — no wildcards, no list capability. + +path "kv/data/disinto/runner/DOCKER_HUB_TOKEN" { + capabilities = ["read"] +} diff --git a/vault/policies/runner-GITHUB_TOKEN.hcl b/vault/policies/runner-GITHUB_TOKEN.hcl new file mode 100644 index 0000000..7914c92 --- /dev/null +++ b/vault/policies/runner-GITHUB_TOKEN.hcl @@ -0,0 +1,10 @@ +# vault/policies/runner-GITHUB_TOKEN.hcl +# +# Per-secret runner policy: GitHub PAT for cross-mirror push / API calls. +# vault-runner (Step 5) composes only the runner-* policies named by the +# dispatching action's `secrets = [...]` list, so this policy intentionally +# scopes a single KV path — no wildcards, no list capability. + +path "kv/data/disinto/runner/GITHUB_TOKEN" { + capabilities = ["read"] +} diff --git a/vault/policies/runner-NPM_TOKEN.hcl b/vault/policies/runner-NPM_TOKEN.hcl new file mode 100644 index 0000000..27c77ee --- /dev/null +++ b/vault/policies/runner-NPM_TOKEN.hcl @@ -0,0 +1,10 @@ +# vault/policies/runner-NPM_TOKEN.hcl +# +# Per-secret runner policy: npm registry auth token for package publish. +# vault-runner (Step 5) composes only the runner-* policies named by the +# dispatching action's `secrets = [...]` list, so this policy intentionally +# scopes a single KV path — no wildcards, no list capability. + +path "kv/data/disinto/runner/NPM_TOKEN" { + capabilities = ["read"] +} diff --git a/vault/policies/service-forgejo.hcl b/vault/policies/service-forgejo.hcl new file mode 100644 index 0000000..8470a23 --- /dev/null +++ b/vault/policies/service-forgejo.hcl @@ -0,0 +1,15 @@ +# vault/policies/service-forgejo.hcl +# +# Read-only access to shared Forgejo secrets (admin password, OAuth client +# config). Attached to the Forgejo Nomad job via workload identity (S2.4). +# +# Scope: kv/disinto/shared/forgejo/* — entries owned by the operator and +# shared between forgejo + the chat OAuth client (issue #855 lineage). + +path "kv/data/disinto/shared/forgejo/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/shared/forgejo/*" { + capabilities = ["list", "read"] +} diff --git a/vault/policies/service-woodpecker.hcl b/vault/policies/service-woodpecker.hcl new file mode 100644 index 0000000..19c9726 --- /dev/null +++ b/vault/policies/service-woodpecker.hcl @@ -0,0 +1,15 @@ +# vault/policies/service-woodpecker.hcl +# +# Read-only access to shared Woodpecker secrets (agent secret, forge OAuth +# client). Attached to the Woodpecker Nomad job via workload identity (S2.4). +# +# Scope: kv/disinto/shared/woodpecker/* — entries owned by the operator +# and consumed by woodpecker-server + woodpecker-agent. + +path "kv/data/disinto/shared/woodpecker/*" { + capabilities = ["read"] +} + +path "kv/metadata/disinto/shared/woodpecker/*" { + capabilities = ["list", "read"] +}