fix: [nomad-step-2] S2-fix — 4 bugs block Step 2 verification: kv/ mount missing, VAULT_ADDR, --sops required, template fallback (#912)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline was successful
ci/woodpecker/pr/ci Pipeline failed
ci/woodpecker/pr/nomad-validate Pipeline was successful
ci/woodpecker/pr/secret-scan Pipeline was successful
ci/woodpecker/pr/smoke-init Pipeline was successful

This commit is contained in:
dev-qwen2 2026-04-16 20:19:03 +00:00
parent 3e29a9a61d
commit 9c6c7147e6
5 changed files with 195 additions and 33 deletions

View file

@ -670,6 +670,7 @@ _disinto_init_nomad() {
local import_env="${4:-}" import_sops="${5:-}" age_key="${6:-}"
local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh"
local deploy_sh="${FACTORY_ROOT}/lib/init/nomad/deploy.sh"
local vault_engines_sh="${FACTORY_ROOT}/lib/init/nomad/vault-engines.sh"
local vault_policies_sh="${FACTORY_ROOT}/tools/vault-apply-policies.sh"
local vault_auth_sh="${FACTORY_ROOT}/lib/init/nomad/vault-nomad-auth.sh"
local vault_import_sh="${FACTORY_ROOT}/tools/vault-import.sh"
@ -690,15 +691,20 @@ _disinto_init_nomad() {
# --empty combined with --with or any --import-* flag, so reaching
# this branch with those set is a bug in the caller.
#
# On the default (non-empty) path, vault-apply-policies.sh and
# vault-nomad-auth.sh are invoked unconditionally — they are idempotent
# and cheap to re-run, and subsequent --with deployments depend on
# them. vault-import.sh is invoked only when an --import-* flag is set.
# On the default (non-empty) path, vault-engines.sh, vault-apply-policies.sh
# and vault-nomad-auth.sh are invoked unconditionally — they are idempotent
# and cheap to re-run, and subsequent --with deployments depend on them.
# vault-engines.sh enables the kv/ secret engine required by all policies
# and roles. vault-import.sh is invoked only when an --import-* flag is set.
local import_any=false
if [ -n "$import_env" ] || [ -n "$import_sops" ]; then
import_any=true
fi
if [ "$empty" != "true" ]; then
if [ ! -x "$vault_engines_sh" ]; then
echo "Error: ${vault_engines_sh} not found or not executable" >&2
exit 1
fi
if [ ! -x "$vault_policies_sh" ]; then
echo "Error: ${vault_policies_sh} not found or not executable" >&2
exit 1
@ -737,10 +743,14 @@ _disinto_init_nomad() {
exit 0
fi
# Vault policies + auth are invoked on every nomad real-run path
# regardless of --import-* flags (they're idempotent; S2.1 + S2.3).
# Vault engines + policies + auth are invoked on every nomad real-run path
# regardless of --import-* flags. Vault engines (kv/ mount) is S2.1,
# policies are S2.1, auth is S2.3 — all idempotent and cheap to re-run.
# Mirror that ordering in the dry-run plan so the operator sees the
# full sequence Step 2 will execute.
echo "── Vault engines dry-run ──────────────────────────────"
echo "[engines] [dry-run] ${vault_engines_sh}"
echo ""
echo "── Vault policies dry-run ─────────────────────────────"
echo "[policies] [dry-run] ${vault_policies_sh} --dry-run"
echo ""
@ -814,6 +824,20 @@ _disinto_init_nomad() {
exit 0
fi
# Enable Vault secret engines (S2.1) — kv/ mount required by all policies.
echo ""
echo "── Enabling Vault secret engines ──────────────────────"
local -a engines_cmd=("$vault_engines_sh")
if [ "$(id -u)" -eq 0 ]; then
"${engines_cmd[@]}" || exit $?
else
if ! command -v sudo >/dev/null 2>&1; then
echo "Error: vault-engines.sh must run as root and sudo is not installed" >&2
exit 1
fi
sudo -n -- "${engines_cmd[@]}" || exit $?
fi
# Apply Vault policies (S2.1) — idempotent, safe to re-run.
echo ""
echo "── Applying Vault policies ────────────────────────────"

115
lib/init/nomad/vault-engines.sh Executable file
View file

@ -0,0 +1,115 @@
#!/usr/bin/env bash
# =============================================================================
# lib/init/nomad/vault-engines.sh — Enable required Vault secret engines
#
# Part of the Nomad+Vault migration (S2.1). Enables the KV v2 secret engine
# at the `kv/` path, which is required by all policies in vault/policies/*.hcl,
# all roles in vault/roles.yaml, and by vault-import.sh and forgejo.hcl
# template stanzas that read from kv/disinto/* paths.
#
# Idempotency contract:
# - If kv/ is already enabled at path=kv (version=2), log "already enabled"
# and exit 0 without any Vault API calls.
# - If kv/ is enabled at a different path or version, log an error and exit 1.
# - Second run on a fully-configured box is a silent no-op.
#
# Preconditions:
# - Vault is unsealed and reachable (VAULT_ADDR + VAULT_TOKEN set).
# - Must run AFTER vault-init.sh (unseal complete) but BEFORE
# vault-apply-policies.sh (policies reference kv/* paths).
#
# Environment:
# VAULT_ADDR — default http://127.0.0.1:8200.
# VAULT_TOKEN — env OR /etc/vault.d/root.token (resolved by lib/hvault.sh).
#
# Usage:
# sudo lib/init/nomad/vault-engines.sh
#
# Exit codes:
# 0 success (kv enabled, or already so)
# 1 precondition / API failure
# =============================================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
# shellcheck source=../../hvault.sh
source "${REPO_ROOT}/lib/hvault.sh"
log() { printf '[vault-engines] %s\n' "$*"; }
die() { printf '[vault-engines] ERROR: %s\n' "$*" >&2; exit 1; }
# ── Flag parsing ─────────────────────────────────────────────────────────────
case "${1:-}" in
-h|--help)
cat <<'EOF'
Usage: sudo $(basename "$0") [--dry-run]
Enables the KV v2 secret engine at path=kv/. Required by all Vault policies,
roles, and Nomad job templates that reference kv/disinto/* paths.
--dry-run Print the enable command without making changes.
EOF
exit 0
;;
--dry-run)
# Dry-run: just echo what would happen
if vault secrets list -format=json | jq -e '."kv/"' >/dev/null 2>&1; then
log "[dry-run] kv-v2 at kv/ already enabled"
else
log "[dry-run] would run: vault secrets enable -path=kv -version=2 kv"
fi
exit 0
;;
'')
;;
*)
die "unknown flag: $1"
;;
esac
# ── Preconditions ────────────────────────────────────────────────────────────
for bin in curl jq; do
command -v "$bin" >/dev/null 2>&1 \
|| die "required binary not found: ${bin}"
done
# Default VAULT_ADDR if not set (fixes issue #2)
VAULT_ADDR="${VAULT_ADDR:-http://127.0.0.1:8200}"
export VAULT_ADDR
# Resolve VAULT_TOKEN if not set (fixes issue #2)
if [ -z "${VAULT_TOKEN:-}" ] && [ -f /etc/vault.d/root.token ]; then
VAULT_TOKEN="$(cat /etc/vault.d/root.token)"
export VAULT_TOKEN
fi
# Check Vault connectivity and unsealed status
hvault_token_lookup >/dev/null \
|| die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN"
# ── Check if kv/ is already enabled ──────────────────────────────────────────
log "── Checking if kv-v2 is already enabled ──"
secrets_list="$(vault secrets list -format=json)"
if printf '%s' "$secrets_list" | jq -e '."kv/"' >/dev/null 2>&1; then
# kv/ exists — verify it's v2
kv_type="$(printf '%s' "$secrets_list" | jq -r '."kv/".type // ""')"
kv_version="$(printf '%s' "$secrets_list" | jq -r '."kv/".options.version // "unknown"')"
if [ "$kv_type" = "kv" ] && [ "$kv_version" = "2" ]; then
log "kv-v2 at kv/ already enabled (type=${kv_type}, version=${kv_version})"
exit 0
else
die "kv/ exists but is not kv-v2 (type=${kv_type}, version=${kv_version}) — manual intervention required"
fi
fi
# ── Enable kv-v2 ──────────────────────────────────────────────────────────────
log "── Enabling kv-v2 at path=kv ──"
vault secrets enable -path=kv -version=2 kv \
|| die "failed to enable kv-v2 secret engine"
log "kv-v2 enabled at kv/"
log "── done ──"

View file

@ -158,6 +158,7 @@ job "forgejo" {
destination = "secrets/forgejo.env"
env = true
change_mode = "restart"
error_on_missing_key = false
data = <<EOT
{{- with secret "kv/data/disinto/shared/forgejo" -}}
FORGEJO__security__SECRET_KEY={{ .Data.data.secret_key }}

View file

@ -82,6 +82,16 @@ if [ "${#POLICY_FILES[@]}" -eq 0 ]; then
die "no *.hcl files in ${POLICIES_DIR}"
fi
# Default VAULT_ADDR if not set (fixes issue #2)
VAULT_ADDR="${VAULT_ADDR:-http://127.0.0.1:8200}"
export VAULT_ADDR
# Resolve VAULT_TOKEN if not set (fixes issue #2)
if [ -z "${VAULT_TOKEN:-}" ] && [ -f /etc/vault.d/root.token ]; then
VAULT_TOKEN="$(cat /etc/vault.d/root.token)"
export VAULT_TOKEN
fi
# ── Dry-run: print plan + exit (no Vault calls) ──────────────────────────────
if [ "$dry_run" = true ]; then
log "dry-run — ${#POLICY_FILES[@]} policy file(s) in ${POLICIES_DIR}"
@ -94,9 +104,6 @@ if [ "$dry_run" = true ]; then
fi
# ── Live run: Vault connectivity check ───────────────────────────────────────
[ -n "${VAULT_ADDR:-}" ] \
|| die "VAULT_ADDR is not set — export VAULT_ADDR=http://127.0.0.1:8200"
# hvault_token_lookup both resolves the token (env or /etc/vault.d/root.token)
# and confirms the server is reachable with a valid token. Fail fast here so
# the per-file loop below doesn't emit N identical "HTTP 403" errors.

View file

@ -236,14 +236,14 @@ vault-import.sh — Import .env and sops-decrypted secrets into Vault KV
Usage:
vault-import.sh \
--env /path/to/.env \
--sops /path/to/.env.vault.enc \
--age-key /path/to/age/keys.txt \
[--sops /path/to/.env.vault.enc] \
[--age-key /path/to/age/keys.txt] \
[--dry-run]
Options:
--env Path to .env file (required)
--sops Path to sops-encrypted .env.vault.enc file (required)
--age-key Path to age keys file (required)
--sops Path to sops-encrypted .env.vault.enc file (optional)
--age-key Path to age keys file (required if --sops is provided)
--dry-run Print import plan without writing to Vault (optional)
--help Show this help message
@ -256,11 +256,12 @@ Mapping:
- WOODPECKER_* → kv/disinto/shared/woodpecker/<lowercase_key>
- FORWARD_AUTH_SECRET, CHAT_OAUTH_* → kv/disinto/shared/chat/<lowercase_key>
From sops-decrypted .env.vault.enc:
From sops-decrypted .env.vault.enc (if --sops provided):
- GITHUB_TOKEN, CODEBERG_TOKEN, CLAWHUB_TOKEN, DEPLOY_KEY, NPM_TOKEN, DOCKER_HUB_TOKEN
→ kv/disinto/runner/<NAME>/value
Examples:
vault-import.sh --env .env # Import .env only (no sops)
vault-import.sh --env .env --sops .env.vault.enc --age-key age-keys.txt
vault-import.sh --env .env --sops .env.vault.enc --age-key age-keys.txt --dry-run
EOF
@ -276,26 +277,28 @@ EOF
if [ -z "$env_file" ]; then
_die "Missing required argument: --env"
fi
if [ -z "$sops_file" ]; then
_die "Missing required argument: --sops"
fi
if [ -z "$age_key_file" ]; then
_die "Missing required argument: --age-key"
# --sops and --age-key are optional:
# - If --sops is provided, --age-key is required
# - If --sops is not provided, --age-key is not needed
if [ -n "$sops_file" ] && [ -z "$age_key_file" ]; then
_die "Missing required argument: --age-key (required when --sops is provided)"
fi
# Validate files exist
if [ ! -f "$env_file" ]; then
_die "Environment file not found: $env_file"
fi
if [ ! -f "$sops_file" ]; then
if [ -n "$sops_file" ] && [ ! -f "$sops_file" ]; then
_die "Sops file not found: $sops_file"
fi
if [ ! -f "$age_key_file" ]; then
if [ -n "$age_key_file" ] && [ ! -f "$age_key_file" ]; then
_die "Age key file not found: $age_key_file"
fi
# Security check: age key permissions
# Security check: age key permissions (only if age key is provided)
if [ -n "$age_key_file" ]; then
_validate_age_key_perms "$age_key_file"
fi
# Security check: VAULT_ADDR must be localhost
_check_vault_addr
@ -307,12 +310,16 @@ EOF
_log "Loading environment from: $env_file"
_load_env_file "$env_file"
# Decrypt sops file
# Decrypt sops file if --sops was provided
local sops_env=""
if [ -n "$sops_file" ]; then
_log "Decrypting sops file: $sops_file"
local sops_env
sops_env="$(_decrypt_sops "$sops_file" "$age_key_file")"
# shellcheck disable=SC2086
eval "$sops_env"
else
_log "No --sops provided — skipping sops decryption"
fi
# Collect all import operations
declare -a operations=()
@ -397,8 +404,12 @@ EOF
if $dry_run; then
_log "=== DRY-RUN: Import plan ==="
_log "Environment file: $env_file"
if [ -n "$sops_file" ]; then
_log "Sops file: $sops_file"
_log "Age key: $age_key_file"
else
_log "Sops file: (not provided)"
fi
_log ""
_log "Planned operations:"
for op in "${operations[@]}"; do
@ -413,8 +424,12 @@ EOF
_log "=== Starting Vault import ==="
_log "Environment file: $env_file"
if [ -n "$sops_file" ]; then
_log "Sops file: $sops_file"
_log "Age key: $age_key_file"
else
_log "Sops file: (not provided — skipping sops-based imports)"
fi
_log ""
local created=0