fix: [nomad-step-2] S2.5 — bin/disinto init --import-env / --import-sops / --age-key wire-up (#883)

Wire the Step-2 building blocks (import, auth, policies) into
`disinto init --backend=nomad` so a single command on a fresh LXC
provisions cluster + policies + auth + imports secrets + deploys
services.

Adds three flags to `disinto init --backend=nomad`:
  --import-env PATH   plaintext .env from old stack
  --import-sops PATH  sops-encrypted .env.vault.enc (requires --age-key)
  --age-key PATH      age keyfile to decrypt --import-sops

Flow: cluster-up.sh → vault-apply-policies.sh → vault-nomad-auth.sh →
(optional) vault-import.sh → deploy.sh. Policies + auth run on every
nomad real-run path (idempotent); import runs only when --import-* is
set; all layers safe to re-run.

Flag validation:
  --import-sops without --age-key → error
  --age-key without --import-sops → error
  --import-env alone (no sops)    → OK
  --backend=docker + any --import-* → error

Dry-run prints a five-section plan (cluster-up + policies + auth +
import + deploy) with every argv that would be executed; touches
nothing, logs no secret values.

Dry-run output prints one line per --import-* flag that is actually
set — not in an if/elif chain — so all three paths appear when all
three flags are passed. Prior attempts regressed this invariant.

Tests:
  tests/disinto-init-nomad.bats +10 cases covering flag validation,
  dry-run plan shape (each flag prints its own path), policies+auth
  always-on (without --import-*), and --flag=value form.

Docs: docs/nomad-migration.md new file — cutover-day runbook with
invocation shape, flag summary, idempotency contract, dry-run, and
secret-hygiene notes.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude 2026-04-16 19:04:04 +00:00
parent 520f8f1be8
commit aa3782748d
3 changed files with 360 additions and 3 deletions

View file

@ -89,6 +89,9 @@ Init options:
--yes Skip confirmation prompts
--rotate-tokens Force regeneration of all bot tokens/passwords (idempotent by default)
--dry-run Print every intended action without executing
--import-env <path> (nomad) Path to .env file for import into Vault KV (S2.5)
--import-sops <path> (nomad) Path to sops-encrypted .env.vault.enc for import (S2.5)
--age-key <path> (nomad) Path to age keyfile (required with --import-sops) (S2.5)
Hire an agent options:
--formula <path> Path to role formula TOML (default: formulas/<role>.toml)
@ -664,8 +667,12 @@ prompt_admin_password() {
# `sudo disinto init ...` directly.
_disinto_init_nomad() {
local dry_run="${1:-false}" empty="${2:-false}" with_services="${3:-}"
local import_env="${4:-}" import_sops="${5:-}" age_key="${6:-}"
local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh"
local deploy_sh="${FACTORY_ROOT}/lib/init/nomad/deploy.sh"
local vault_policies_sh="${FACTORY_ROOT}/tools/vault-apply-policies.sh"
local vault_auth_sh="${FACTORY_ROOT}/lib/init/nomad/vault-nomad-auth.sh"
local vault_import_sh="${FACTORY_ROOT}/tools/vault-import.sh"
if [ ! -x "$cluster_up" ]; then
echo "Error: ${cluster_up} not found or not executable" >&2
@ -677,6 +684,27 @@ _disinto_init_nomad() {
exit 1
fi
# Step 2/3/4 scripts must exist as soon as any --import-* flag is set,
# since we unconditionally invoke policies+auth and optionally import.
local import_any=false
if [ -n "$import_env" ] || [ -n "$import_sops" ]; then
import_any=true
fi
if [ "$import_any" = true ]; then
if [ ! -x "$vault_policies_sh" ]; then
echo "Error: ${vault_policies_sh} not found or not executable" >&2
exit 1
fi
if [ ! -x "$vault_auth_sh" ]; then
echo "Error: ${vault_auth_sh} not found or not executable" >&2
exit 1
fi
if [ ! -x "$vault_import_sh" ]; then
echo "Error: ${vault_import_sh} not found or not executable" >&2
exit 1
fi
fi
# --empty and default both invoke cluster-up today. Log the requested
# mode so the dispatch is visible in factory bootstrap logs — Step 1
# will branch on $empty to gate the job-deployment path.
@ -686,7 +714,7 @@ _disinto_init_nomad() {
echo "nomad backend: default (cluster-up; jobs deferred to Step 1)"
fi
# Dry-run: print cluster-up plan + deploy.sh plan
# Dry-run: print cluster-up plan + policies/auth/import plan + deploy.sh plan
if [ "$dry_run" = "true" ]; then
echo ""
echo "── Cluster-up dry-run ─────────────────────────────────"
@ -694,6 +722,38 @@ _disinto_init_nomad() {
"${cmd[@]}" || true
echo ""
# Vault policies + auth are invoked on every nomad real-run path
# regardless of --import-* flags (they're idempotent; S2.1 + S2.3).
# Mirror that ordering in the dry-run plan so the operator sees the
# full sequence Step 2 will execute.
echo "── Vault policies dry-run ─────────────────────────────"
echo "[policies] [dry-run] ${vault_policies_sh} --dry-run"
echo ""
echo "── Vault auth dry-run ─────────────────────────────────"
echo "[auth] [dry-run] ${vault_auth_sh}"
echo ""
# Import plan: one line per --import-* flag that is actually set.
# Printing independently (not in an if/elif chain) means that all
# three flags appearing together each echo their own path — the
# regression that bit prior implementations of this issue (#883).
if [ "$import_any" = true ]; then
echo "── Vault import dry-run ───────────────────────────────"
[ -n "$import_env" ] && echo "[import] --import-env env file: ${import_env}"
[ -n "$import_sops" ] && echo "[import] --import-sops sops file: ${import_sops}"
[ -n "$age_key" ] && echo "[import] --age-key age key: ${age_key}"
local -a import_dry_cmd=("$vault_import_sh")
[ -n "$import_env" ] && import_dry_cmd+=("--env" "$import_env")
[ -n "$import_sops" ] && import_dry_cmd+=("--sops" "$import_sops")
[ -n "$age_key" ] && import_dry_cmd+=("--age-key" "$age_key")
import_dry_cmd+=("--dry-run")
echo "[import] [dry-run] ${import_dry_cmd[*]}"
echo ""
else
echo "[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services"
echo ""
fi
if [ -n "$with_services" ]; then
echo "── Deploy services dry-run ────────────────────────────"
echo "[deploy] services to deploy: ${with_services}"
@ -721,7 +781,7 @@ _disinto_init_nomad() {
exit 0
fi
# Real run: cluster-up + deploy services
# Real run: cluster-up + policies + auth + (optional) import + deploy
local -a cluster_cmd=("$cluster_up")
if [ "$(id -u)" -eq 0 ]; then
"${cluster_cmd[@]}" || exit $?
@ -733,6 +793,56 @@ _disinto_init_nomad() {
sudo -n -- "${cluster_cmd[@]}" || exit $?
fi
# Apply Vault policies (S2.1) — idempotent, safe to re-run.
echo ""
echo "── Applying Vault policies ────────────────────────────"
local -a policies_cmd=("$vault_policies_sh")
if [ "$(id -u)" -eq 0 ]; then
"${policies_cmd[@]}" || exit $?
else
if ! command -v sudo >/dev/null 2>&1; then
echo "Error: vault-apply-policies.sh must run as root and sudo is not installed" >&2
exit 1
fi
sudo -n -- "${policies_cmd[@]}" || exit $?
fi
# Configure Vault JWT auth + Nomad workload identity (S2.3) — idempotent.
echo ""
echo "── Configuring Vault JWT auth ─────────────────────────"
local -a auth_cmd=("$vault_auth_sh")
if [ "$(id -u)" -eq 0 ]; then
"${auth_cmd[@]}" || exit $?
else
if ! command -v sudo >/dev/null 2>&1; then
echo "Error: vault-nomad-auth.sh must run as root and sudo is not installed" >&2
exit 1
fi
sudo -n -- "${auth_cmd[@]}" || exit $?
fi
# Import secrets if any --import-* flag is set (S2.2).
if [ "$import_any" = true ]; then
echo ""
echo "── Importing secrets into Vault ───────────────────────"
local -a import_cmd=("$vault_import_sh")
[ -n "$import_env" ] && import_cmd+=("--env" "$import_env")
[ -n "$import_sops" ] && import_cmd+=("--sops" "$import_sops")
[ -n "$age_key" ] && import_cmd+=("--age-key" "$age_key")
if [ "$(id -u)" -eq 0 ]; then
"${import_cmd[@]}" || exit $?
else
if ! command -v sudo >/dev/null 2>&1; then
echo "Error: vault-import.sh must run as root and sudo is not installed" >&2
exit 1
fi
sudo -n -- "${import_cmd[@]}" || exit $?
fi
else
echo ""
echo "[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services"
fi
# Deploy services if requested
if [ -n "$with_services" ]; then
echo ""
@ -777,6 +887,16 @@ _disinto_init_nomad() {
echo ""
echo "── Summary ────────────────────────────────────────────"
echo "Cluster: Nomad+Vault cluster is up"
echo "Policies: applied (Vault ACL)"
echo "Auth: Vault JWT auth + Nomad workload identity configured"
if [ "$import_any" = true ]; then
local import_desc=""
[ -n "$import_env" ] && import_desc+="${import_env} "
[ -n "$import_sops" ] && import_desc+="${import_sops} "
echo "Imported: ${import_desc% }"
else
echo "Imported: (none — seed kv/disinto/* manually before deploying secret-dependent services)"
fi
echo "Deployed: ${with_services}"
if echo "$with_services" | grep -q "forgejo"; then
echo "Ports: forgejo: 3000"
@ -803,6 +923,7 @@ disinto_init() {
# Parse flags
local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false with_services=""
local import_env="" import_sops="" age_key=""
while [ $# -gt 0 ]; do
case "$1" in
--branch) branch="$2"; shift 2 ;;
@ -819,6 +940,12 @@ disinto_init() {
--yes) auto_yes=true; shift ;;
--rotate-tokens) rotate_tokens=true; shift ;;
--dry-run) dry_run=true; shift ;;
--import-env) import_env="$2"; shift 2 ;;
--import-env=*) import_env="${1#--import-env=}"; shift ;;
--import-sops) import_sops="$2"; shift 2 ;;
--import-sops=*) import_sops="${1#--import-sops=}"; shift ;;
--age-key) age_key="$2"; shift 2 ;;
--age-key=*) age_key="${1#--age-key=}"; shift ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
@ -859,11 +986,31 @@ disinto_init() {
exit 1
fi
# --import-* flag validation (S2.5). These three flags form an import
# triple and must be consistent before dispatch: sops encryption is
# useless without the age key to decrypt it, so either both --import-sops
# and --age-key are present or neither is. --import-env alone is fine
# (it just imports the plaintext dotenv). All three flags are nomad-only.
if [ -n "$import_sops" ] && [ -z "$age_key" ]; then
echo "Error: --import-sops requires --age-key" >&2
exit 1
fi
if [ -n "$age_key" ] && [ -z "$import_sops" ]; then
echo "Error: --age-key requires --import-sops" >&2
exit 1
fi
if { [ -n "$import_env" ] || [ -n "$import_sops" ] || [ -n "$age_key" ]; } \
&& [ "$backend" != "nomad" ]; then
echo "Error: --import-env, --import-sops, and --age-key require --backend=nomad" >&2
exit 1
fi
# Dispatch on backend — the nomad path runs lib/init/nomad/cluster-up.sh
# (S0.4). The default and --empty variants are identical today; Step 1
# will branch on $empty to add job deployment to the default path.
if [ "$backend" = "nomad" ]; then
_disinto_init_nomad "$dry_run" "$empty" "$with_services"
_disinto_init_nomad "$dry_run" "$empty" "$with_services" \
"$import_env" "$import_sops" "$age_key"
# shellcheck disable=SC2317 # _disinto_init_nomad always exits today;
# `return` is defensive against future refactors.
return

121
docs/nomad-migration.md Normal file
View file

@ -0,0 +1,121 @@
<!-- last-reviewed: (new file, S2.5 #883) -->
# Nomad+Vault migration — cutover-day runbook
`disinto init --backend=nomad` is the single entry-point that turns a fresh
LXC (with the disinto repo cloned) into a running Nomad+Vault cluster with
policies applied, JWT workload-identity auth configured, secrets imported
from the old docker stack, and services deployed.
## Cutover-day invocation
On the new LXC, as root (or an operator with NOPASSWD sudo):
```bash
# Copy the plaintext .env + sops-encrypted .env.vault.enc + age keyfile
# from the old box first (out of band — SSH, USB, whatever your ops
# procedure allows). Then:
sudo ./bin/disinto init \
--backend=nomad \
--import-env /tmp/.env \
--import-sops /tmp/.env.vault.enc \
--age-key /tmp/keys.txt \
--with forgejo
```
This runs, in order:
1. **`lib/init/nomad/cluster-up.sh`** (S0) — installs Nomad + Vault
binaries, writes `/etc/nomad.d/*`, initializes Vault, starts both
services, waits for the Nomad node to become ready.
2. **`tools/vault-apply-policies.sh`** (S2.1) — syncs every
`vault/policies/*.hcl` into Vault as an ACL policy. Idempotent.
3. **`lib/init/nomad/vault-nomad-auth.sh`** (S2.3) — enables Vault's
JWT auth method at `jwt-nomad`, points it at Nomad's JWKS, writes
one role per policy, reloads Nomad so jobs can exchange
workload-identity tokens for Vault tokens. Idempotent.
4. **`tools/vault-import.sh`** (S2.2) — reads `/tmp/.env` and the
sops-decrypted `/tmp/.env.vault.enc`, writes them to the KV paths
matching the S2.1 policy layout (`kv/disinto/bots/*`, `kv/disinto/shared/*`,
`kv/disinto/runner/*`). Idempotent (overwrites KV v2 data in place).
5. **`lib/init/nomad/deploy.sh forgejo`** (S1) — validates + runs the
`nomad/jobs/forgejo.hcl` jobspec. Forgejo reads its admin creds from
Vault via the `template` stanza (S2.4).
## Flag summary
| Flag | Meaning |
|---|---|
| `--backend=nomad` | Switch the init dispatcher to the Nomad+Vault path (instead of docker compose). |
| `--empty` | Bring the cluster up, skip policies/auth/import/deploy. Escape hatch for debugging. |
| `--with forgejo[,…]` | Deploy these services after the cluster is up. |
| `--import-env PATH` | Plaintext `.env` from the old stack. Optional. |
| `--import-sops PATH` | Sops-encrypted `.env.vault.enc` from the old stack. Requires `--age-key`. |
| `--age-key PATH` | Age keyfile used to decrypt `--import-sops`. Requires `--import-sops`. |
| `--dry-run` | Print the full plan (cluster-up + policies + auth + import + deploy) and exit. Touches nothing. |
### Flag validation
- `--import-sops` without `--age-key` → error.
- `--age-key` without `--import-sops` → error.
- `--import-env` alone (no sops) → OK (imports just the plaintext `.env`).
- `--backend=docker` with any `--import-*` flag → error.
## Idempotency
Every layer is idempotent by design. Re-running the same command on an
already-provisioned box is a no-op at every step:
- **Cluster-up:** second run detects running `nomad`/`vault` systemd
units and state files, skips re-init.
- **Policies:** byte-for-byte compare against on-server policy text;
"unchanged" for every untouched file.
- **Auth:** skips auth-method create if `jwt-nomad/` already enabled,
skips config write if the JWKS + algs match, skips server.hcl write if
the file on disk is identical to the repo copy.
- **Import:** KV v2 writes overwrite in place (same path, same keys,
same values → no new version).
- **Deploy:** `nomad job run` is declarative; same jobspec → no new
allocation.
## Dry-run
```bash
./bin/disinto init --backend=nomad \
--import-env /tmp/.env \
--import-sops /tmp/.env.vault.enc \
--age-key /tmp/keys.txt \
--with forgejo \
--dry-run
```
Prints the five-section plan — cluster-up, policies, auth, import,
deploy — with every path and every argv that would be executed. No
network, no sudo, no state mutation. See
`tests/disinto-init-nomad.bats` for the exact output shape.
## No-import path
If you already have `kv/disinto/*` seeded by other means (manual
`vault kv put`, a replica, etc.), omit all three `--import-*` flags.
`disinto init --backend=nomad --with forgejo` still applies policies,
configures auth, and deploys — but skips the import step with:
```
[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services
```
Forgejo's template stanza will fail to render (and thus the allocation
will stall) until those KV paths exist — so either import them or seed
them first.
## Secret hygiene
- Never log a secret value. The CLI only prints paths (`--import-env`,
`--age-key`) and KV *paths* (`kv/disinto/bots/review/token`), never
the values themselves. `tools/vault-import.sh` is the only thing that
reads the values, and it pipes them directly into Vault's HTTP API.
- The age keyfile must be mode 0400 — `vault-import.sh` refuses to
source a keyfile with looser permissions.
- `VAULT_ADDR` must be localhost during import — the import tool
refuses to run against a remote Vault, preventing accidental exposure.

View file

@ -191,3 +191,92 @@ setup_file() {
[ "$status" -ne 0 ]
[[ "$output" == *"--empty and --with are mutually exclusive"* ]]
}
# ── --import-env / --import-sops / --age-key (S2.5, #883) ────────────────────
#
# Step 2.5 wires Vault policies + JWT auth + optional KV import into
# `disinto init --backend=nomad`. The tests below exercise the flag
# grammar (who-requires-whom + who-requires-backend=nomad) and the
# dry-run plan shape (each --import-* flag prints its own path line,
# independently). A prior attempt at this issue regressed the "print
# every set flag" invariant by using if/elif — covered by the
# "--import-env --import-sops --age-key" case.
@test "disinto init --backend=nomad --import-env only is accepted" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-env /tmp/.env --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"--import-env"* ]]
[[ "$output" == *"env file: /tmp/.env"* ]]
}
@test "disinto init --backend=nomad --import-sops without --age-key errors" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-sops /tmp/.env.vault.enc --dry-run
[ "$status" -ne 0 ]
[[ "$output" == *"--import-sops requires --age-key"* ]]
}
@test "disinto init --backend=nomad --age-key without --import-sops errors" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --age-key /tmp/keys.txt --dry-run
[ "$status" -ne 0 ]
[[ "$output" == *"--age-key requires --import-sops"* ]]
}
@test "disinto init --backend=docker --import-env errors with backend requirement" {
run "$DISINTO_BIN" init placeholder/repo --backend=docker --import-env /tmp/.env
[ "$status" -ne 0 ]
[[ "$output" == *"--import-env, --import-sops, and --age-key require --backend=nomad"* ]]
}
@test "disinto init --backend=nomad --import-sops --age-key --dry-run shows import plan" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-sops /tmp/.env.vault.enc --age-key /tmp/keys.txt --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"Vault import dry-run"* ]]
[[ "$output" == *"--import-sops"* ]]
[[ "$output" == *"--age-key"* ]]
[[ "$output" == *"sops file: /tmp/.env.vault.enc"* ]]
[[ "$output" == *"age key: /tmp/keys.txt"* ]]
}
# When all three flags are set, each one must print its own path line —
# if/elif regressed this to "only one printed" in a prior attempt (#883).
@test "disinto init --backend=nomad --import-env --import-sops --age-key --dry-run shows full import plan" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-env /tmp/.env --import-sops /tmp/.env.vault.enc --age-key /tmp/keys.txt --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"Vault import dry-run"* ]]
[[ "$output" == *"env file: /tmp/.env"* ]]
[[ "$output" == *"sops file: /tmp/.env.vault.enc"* ]]
[[ "$output" == *"age key: /tmp/keys.txt"* ]]
}
@test "disinto init --backend=nomad without import flags shows skip message" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"no --import-env/--import-sops"* ]]
[[ "$output" == *"skipping"* ]]
}
@test "disinto init --backend=nomad --import-env --import-sops --age-key --with forgejo --dry-run shows all plans" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-env /tmp/.env --import-sops /tmp/.env.vault.enc --age-key /tmp/keys.txt --with forgejo --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"Vault import dry-run"* ]]
[[ "$output" == *"Vault policies dry-run"* ]]
[[ "$output" == *"Vault auth dry-run"* ]]
[[ "$output" == *"Deploy services dry-run"* ]]
}
@test "disinto init --backend=nomad --dry-run prints policies + auth plan even without --import-*" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --dry-run
[ "$status" -eq 0 ]
# Policies + auth run on every nomad path (idempotent), so the dry-run
# plan always lists them — regardless of whether --import-* is set.
[[ "$output" == *"Vault policies dry-run"* ]]
[[ "$output" == *"Vault auth dry-run"* ]]
[[ "$output" != *"Vault import dry-run"* ]]
}
# --import-env=PATH (=-form) must work alongside --import-env PATH.
@test "disinto init --backend=nomad --import-env=PATH (equals form) works" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-env=/tmp/.env --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"env file: /tmp/.env"* ]]
}