diff --git a/bin/disinto b/bin/disinto index 6128b7c..a752bac 100755 --- a/bin/disinto +++ b/bin/disinto @@ -89,6 +89,9 @@ Init options: --yes Skip confirmation prompts --rotate-tokens Force regeneration of all bot tokens/passwords (idempotent by default) --dry-run Print every intended action without executing + --import-env (nomad) Path to .env file for import into Vault KV (S2.5) + --import-sops (nomad) Path to sops-encrypted .env.vault.enc for import (S2.5) + --age-key (nomad) Path to age keyfile (required with --import-sops) (S2.5) Hire an agent options: --formula Path to role formula TOML (default: formulas/.toml) @@ -664,8 +667,12 @@ prompt_admin_password() { # `sudo disinto init ...` directly. _disinto_init_nomad() { local dry_run="${1:-false}" empty="${2:-false}" with_services="${3:-}" + local import_env="${4:-}" import_sops="${5:-}" age_key="${6:-}" local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh" local deploy_sh="${FACTORY_ROOT}/lib/init/nomad/deploy.sh" + local vault_policies_sh="${FACTORY_ROOT}/tools/vault-apply-policies.sh" + local vault_auth_sh="${FACTORY_ROOT}/lib/init/nomad/vault-nomad-auth.sh" + local vault_import_sh="${FACTORY_ROOT}/tools/vault-import.sh" if [ ! -x "$cluster_up" ]; then echo "Error: ${cluster_up} not found or not executable" >&2 @@ -677,6 +684,27 @@ _disinto_init_nomad() { exit 1 fi + # Step 2/3/4 scripts must exist as soon as any --import-* flag is set, + # since we unconditionally invoke policies+auth and optionally import. + local import_any=false + if [ -n "$import_env" ] || [ -n "$import_sops" ]; then + import_any=true + fi + if [ "$import_any" = true ]; then + if [ ! -x "$vault_policies_sh" ]; then + echo "Error: ${vault_policies_sh} not found or not executable" >&2 + exit 1 + fi + if [ ! -x "$vault_auth_sh" ]; then + echo "Error: ${vault_auth_sh} not found or not executable" >&2 + exit 1 + fi + if [ ! -x "$vault_import_sh" ]; then + echo "Error: ${vault_import_sh} not found or not executable" >&2 + exit 1 + fi + fi + # --empty and default both invoke cluster-up today. Log the requested # mode so the dispatch is visible in factory bootstrap logs — Step 1 # will branch on $empty to gate the job-deployment path. @@ -686,7 +714,7 @@ _disinto_init_nomad() { echo "nomad backend: default (cluster-up; jobs deferred to Step 1)" fi - # Dry-run: print cluster-up plan + deploy.sh plan + # Dry-run: print cluster-up plan + policies/auth/import plan + deploy.sh plan if [ "$dry_run" = "true" ]; then echo "" echo "── Cluster-up dry-run ─────────────────────────────────" @@ -694,6 +722,38 @@ _disinto_init_nomad() { "${cmd[@]}" || true echo "" + # Vault policies + auth are invoked on every nomad real-run path + # regardless of --import-* flags (they're idempotent; S2.1 + S2.3). + # Mirror that ordering in the dry-run plan so the operator sees the + # full sequence Step 2 will execute. + echo "── Vault policies dry-run ─────────────────────────────" + echo "[policies] [dry-run] ${vault_policies_sh} --dry-run" + echo "" + echo "── Vault auth dry-run ─────────────────────────────────" + echo "[auth] [dry-run] ${vault_auth_sh}" + echo "" + + # Import plan: one line per --import-* flag that is actually set. + # Printing independently (not in an if/elif chain) means that all + # three flags appearing together each echo their own path — the + # regression that bit prior implementations of this issue (#883). + if [ "$import_any" = true ]; then + echo "── Vault import dry-run ───────────────────────────────" + [ -n "$import_env" ] && echo "[import] --import-env env file: ${import_env}" + [ -n "$import_sops" ] && echo "[import] --import-sops sops file: ${import_sops}" + [ -n "$age_key" ] && echo "[import] --age-key age key: ${age_key}" + local -a import_dry_cmd=("$vault_import_sh") + [ -n "$import_env" ] && import_dry_cmd+=("--env" "$import_env") + [ -n "$import_sops" ] && import_dry_cmd+=("--sops" "$import_sops") + [ -n "$age_key" ] && import_dry_cmd+=("--age-key" "$age_key") + import_dry_cmd+=("--dry-run") + echo "[import] [dry-run] ${import_dry_cmd[*]}" + echo "" + else + echo "[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services" + echo "" + fi + if [ -n "$with_services" ]; then echo "── Deploy services dry-run ────────────────────────────" echo "[deploy] services to deploy: ${with_services}" @@ -721,7 +781,7 @@ _disinto_init_nomad() { exit 0 fi - # Real run: cluster-up + deploy services + # Real run: cluster-up + policies + auth + (optional) import + deploy local -a cluster_cmd=("$cluster_up") if [ "$(id -u)" -eq 0 ]; then "${cluster_cmd[@]}" || exit $? @@ -733,6 +793,56 @@ _disinto_init_nomad() { sudo -n -- "${cluster_cmd[@]}" || exit $? fi + # Apply Vault policies (S2.1) — idempotent, safe to re-run. + echo "" + echo "── Applying Vault policies ────────────────────────────" + local -a policies_cmd=("$vault_policies_sh") + if [ "$(id -u)" -eq 0 ]; then + "${policies_cmd[@]}" || exit $? + else + if ! command -v sudo >/dev/null 2>&1; then + echo "Error: vault-apply-policies.sh must run as root and sudo is not installed" >&2 + exit 1 + fi + sudo -n -- "${policies_cmd[@]}" || exit $? + fi + + # Configure Vault JWT auth + Nomad workload identity (S2.3) — idempotent. + echo "" + echo "── Configuring Vault JWT auth ─────────────────────────" + local -a auth_cmd=("$vault_auth_sh") + if [ "$(id -u)" -eq 0 ]; then + "${auth_cmd[@]}" || exit $? + else + if ! command -v sudo >/dev/null 2>&1; then + echo "Error: vault-nomad-auth.sh must run as root and sudo is not installed" >&2 + exit 1 + fi + sudo -n -- "${auth_cmd[@]}" || exit $? + fi + + # Import secrets if any --import-* flag is set (S2.2). + if [ "$import_any" = true ]; then + echo "" + echo "── Importing secrets into Vault ───────────────────────" + local -a import_cmd=("$vault_import_sh") + [ -n "$import_env" ] && import_cmd+=("--env" "$import_env") + [ -n "$import_sops" ] && import_cmd+=("--sops" "$import_sops") + [ -n "$age_key" ] && import_cmd+=("--age-key" "$age_key") + if [ "$(id -u)" -eq 0 ]; then + "${import_cmd[@]}" || exit $? + else + if ! command -v sudo >/dev/null 2>&1; then + echo "Error: vault-import.sh must run as root and sudo is not installed" >&2 + exit 1 + fi + sudo -n -- "${import_cmd[@]}" || exit $? + fi + else + echo "" + echo "[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services" + fi + # Deploy services if requested if [ -n "$with_services" ]; then echo "" @@ -777,6 +887,16 @@ _disinto_init_nomad() { echo "" echo "── Summary ────────────────────────────────────────────" echo "Cluster: Nomad+Vault cluster is up" + echo "Policies: applied (Vault ACL)" + echo "Auth: Vault JWT auth + Nomad workload identity configured" + if [ "$import_any" = true ]; then + local import_desc="" + [ -n "$import_env" ] && import_desc+="${import_env} " + [ -n "$import_sops" ] && import_desc+="${import_sops} " + echo "Imported: ${import_desc% }" + else + echo "Imported: (none — seed kv/disinto/* manually before deploying secret-dependent services)" + fi echo "Deployed: ${with_services}" if echo "$with_services" | grep -q "forgejo"; then echo "Ports: forgejo: 3000" @@ -803,6 +923,7 @@ disinto_init() { # Parse flags local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false with_services="" + local import_env="" import_sops="" age_key="" while [ $# -gt 0 ]; do case "$1" in --branch) branch="$2"; shift 2 ;; @@ -819,6 +940,12 @@ disinto_init() { --yes) auto_yes=true; shift ;; --rotate-tokens) rotate_tokens=true; shift ;; --dry-run) dry_run=true; shift ;; + --import-env) import_env="$2"; shift 2 ;; + --import-env=*) import_env="${1#--import-env=}"; shift ;; + --import-sops) import_sops="$2"; shift 2 ;; + --import-sops=*) import_sops="${1#--import-sops=}"; shift ;; + --age-key) age_key="$2"; shift 2 ;; + --age-key=*) age_key="${1#--age-key=}"; shift ;; *) echo "Unknown option: $1" >&2; exit 1 ;; esac done @@ -859,11 +986,31 @@ disinto_init() { exit 1 fi + # --import-* flag validation (S2.5). These three flags form an import + # triple and must be consistent before dispatch: sops encryption is + # useless without the age key to decrypt it, so either both --import-sops + # and --age-key are present or neither is. --import-env alone is fine + # (it just imports the plaintext dotenv). All three flags are nomad-only. + if [ -n "$import_sops" ] && [ -z "$age_key" ]; then + echo "Error: --import-sops requires --age-key" >&2 + exit 1 + fi + if [ -n "$age_key" ] && [ -z "$import_sops" ]; then + echo "Error: --age-key requires --import-sops" >&2 + exit 1 + fi + if { [ -n "$import_env" ] || [ -n "$import_sops" ] || [ -n "$age_key" ]; } \ + && [ "$backend" != "nomad" ]; then + echo "Error: --import-env, --import-sops, and --age-key require --backend=nomad" >&2 + exit 1 + fi + # Dispatch on backend — the nomad path runs lib/init/nomad/cluster-up.sh # (S0.4). The default and --empty variants are identical today; Step 1 # will branch on $empty to add job deployment to the default path. if [ "$backend" = "nomad" ]; then - _disinto_init_nomad "$dry_run" "$empty" "$with_services" + _disinto_init_nomad "$dry_run" "$empty" "$with_services" \ + "$import_env" "$import_sops" "$age_key" # shellcheck disable=SC2317 # _disinto_init_nomad always exits today; # `return` is defensive against future refactors. return diff --git a/docs/nomad-migration.md b/docs/nomad-migration.md new file mode 100644 index 0000000..8984b10 --- /dev/null +++ b/docs/nomad-migration.md @@ -0,0 +1,121 @@ + +# Nomad+Vault migration — cutover-day runbook + +`disinto init --backend=nomad` is the single entry-point that turns a fresh +LXC (with the disinto repo cloned) into a running Nomad+Vault cluster with +policies applied, JWT workload-identity auth configured, secrets imported +from the old docker stack, and services deployed. + +## Cutover-day invocation + +On the new LXC, as root (or an operator with NOPASSWD sudo): + +```bash +# Copy the plaintext .env + sops-encrypted .env.vault.enc + age keyfile +# from the old box first (out of band — SSH, USB, whatever your ops +# procedure allows). Then: + +sudo ./bin/disinto init \ + --backend=nomad \ + --import-env /tmp/.env \ + --import-sops /tmp/.env.vault.enc \ + --age-key /tmp/keys.txt \ + --with forgejo +``` + +This runs, in order: + +1. **`lib/init/nomad/cluster-up.sh`** (S0) — installs Nomad + Vault + binaries, writes `/etc/nomad.d/*`, initializes Vault, starts both + services, waits for the Nomad node to become ready. +2. **`tools/vault-apply-policies.sh`** (S2.1) — syncs every + `vault/policies/*.hcl` into Vault as an ACL policy. Idempotent. +3. **`lib/init/nomad/vault-nomad-auth.sh`** (S2.3) — enables Vault's + JWT auth method at `jwt-nomad`, points it at Nomad's JWKS, writes + one role per policy, reloads Nomad so jobs can exchange + workload-identity tokens for Vault tokens. Idempotent. +4. **`tools/vault-import.sh`** (S2.2) — reads `/tmp/.env` and the + sops-decrypted `/tmp/.env.vault.enc`, writes them to the KV paths + matching the S2.1 policy layout (`kv/disinto/bots/*`, `kv/disinto/shared/*`, + `kv/disinto/runner/*`). Idempotent (overwrites KV v2 data in place). +5. **`lib/init/nomad/deploy.sh forgejo`** (S1) — validates + runs the + `nomad/jobs/forgejo.hcl` jobspec. Forgejo reads its admin creds from + Vault via the `template` stanza (S2.4). + +## Flag summary + +| Flag | Meaning | +|---|---| +| `--backend=nomad` | Switch the init dispatcher to the Nomad+Vault path (instead of docker compose). | +| `--empty` | Bring the cluster up, skip policies/auth/import/deploy. Escape hatch for debugging. | +| `--with forgejo[,…]` | Deploy these services after the cluster is up. | +| `--import-env PATH` | Plaintext `.env` from the old stack. Optional. | +| `--import-sops PATH` | Sops-encrypted `.env.vault.enc` from the old stack. Requires `--age-key`. | +| `--age-key PATH` | Age keyfile used to decrypt `--import-sops`. Requires `--import-sops`. | +| `--dry-run` | Print the full plan (cluster-up + policies + auth + import + deploy) and exit. Touches nothing. | + +### Flag validation + +- `--import-sops` without `--age-key` → error. +- `--age-key` without `--import-sops` → error. +- `--import-env` alone (no sops) → OK (imports just the plaintext `.env`). +- `--backend=docker` with any `--import-*` flag → error. + +## Idempotency + +Every layer is idempotent by design. Re-running the same command on an +already-provisioned box is a no-op at every step: + +- **Cluster-up:** second run detects running `nomad`/`vault` systemd + units and state files, skips re-init. +- **Policies:** byte-for-byte compare against on-server policy text; + "unchanged" for every untouched file. +- **Auth:** skips auth-method create if `jwt-nomad/` already enabled, + skips config write if the JWKS + algs match, skips server.hcl write if + the file on disk is identical to the repo copy. +- **Import:** KV v2 writes overwrite in place (same path, same keys, + same values → no new version). +- **Deploy:** `nomad job run` is declarative; same jobspec → no new + allocation. + +## Dry-run + +```bash +./bin/disinto init --backend=nomad \ + --import-env /tmp/.env \ + --import-sops /tmp/.env.vault.enc \ + --age-key /tmp/keys.txt \ + --with forgejo \ + --dry-run +``` + +Prints the five-section plan — cluster-up, policies, auth, import, +deploy — with every path and every argv that would be executed. No +network, no sudo, no state mutation. See +`tests/disinto-init-nomad.bats` for the exact output shape. + +## No-import path + +If you already have `kv/disinto/*` seeded by other means (manual +`vault kv put`, a replica, etc.), omit all three `--import-*` flags. +`disinto init --backend=nomad --with forgejo` still applies policies, +configures auth, and deploys — but skips the import step with: + +``` +[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services +``` + +Forgejo's template stanza will fail to render (and thus the allocation +will stall) until those KV paths exist — so either import them or seed +them first. + +## Secret hygiene + +- Never log a secret value. The CLI only prints paths (`--import-env`, + `--age-key`) and KV *paths* (`kv/disinto/bots/review/token`), never + the values themselves. `tools/vault-import.sh` is the only thing that + reads the values, and it pipes them directly into Vault's HTTP API. +- The age keyfile must be mode 0400 — `vault-import.sh` refuses to + source a keyfile with looser permissions. +- `VAULT_ADDR` must be localhost during import — the import tool + refuses to run against a remote Vault, preventing accidental exposure. diff --git a/tests/disinto-init-nomad.bats b/tests/disinto-init-nomad.bats index 84cfa10..30c7f7c 100644 --- a/tests/disinto-init-nomad.bats +++ b/tests/disinto-init-nomad.bats @@ -191,3 +191,92 @@ setup_file() { [ "$status" -ne 0 ] [[ "$output" == *"--empty and --with are mutually exclusive"* ]] } + +# ── --import-env / --import-sops / --age-key (S2.5, #883) ──────────────────── +# +# Step 2.5 wires Vault policies + JWT auth + optional KV import into +# `disinto init --backend=nomad`. The tests below exercise the flag +# grammar (who-requires-whom + who-requires-backend=nomad) and the +# dry-run plan shape (each --import-* flag prints its own path line, +# independently). A prior attempt at this issue regressed the "print +# every set flag" invariant by using if/elif — covered by the +# "--import-env --import-sops --age-key" case. + +@test "disinto init --backend=nomad --import-env only is accepted" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-env /tmp/.env --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"--import-env"* ]] + [[ "$output" == *"env file: /tmp/.env"* ]] +} + +@test "disinto init --backend=nomad --import-sops without --age-key errors" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-sops /tmp/.env.vault.enc --dry-run + [ "$status" -ne 0 ] + [[ "$output" == *"--import-sops requires --age-key"* ]] +} + +@test "disinto init --backend=nomad --age-key without --import-sops errors" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --age-key /tmp/keys.txt --dry-run + [ "$status" -ne 0 ] + [[ "$output" == *"--age-key requires --import-sops"* ]] +} + +@test "disinto init --backend=docker --import-env errors with backend requirement" { + run "$DISINTO_BIN" init placeholder/repo --backend=docker --import-env /tmp/.env + [ "$status" -ne 0 ] + [[ "$output" == *"--import-env, --import-sops, and --age-key require --backend=nomad"* ]] +} + +@test "disinto init --backend=nomad --import-sops --age-key --dry-run shows import plan" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-sops /tmp/.env.vault.enc --age-key /tmp/keys.txt --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"Vault import dry-run"* ]] + [[ "$output" == *"--import-sops"* ]] + [[ "$output" == *"--age-key"* ]] + [[ "$output" == *"sops file: /tmp/.env.vault.enc"* ]] + [[ "$output" == *"age key: /tmp/keys.txt"* ]] +} + +# When all three flags are set, each one must print its own path line — +# if/elif regressed this to "only one printed" in a prior attempt (#883). +@test "disinto init --backend=nomad --import-env --import-sops --age-key --dry-run shows full import plan" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-env /tmp/.env --import-sops /tmp/.env.vault.enc --age-key /tmp/keys.txt --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"Vault import dry-run"* ]] + [[ "$output" == *"env file: /tmp/.env"* ]] + [[ "$output" == *"sops file: /tmp/.env.vault.enc"* ]] + [[ "$output" == *"age key: /tmp/keys.txt"* ]] +} + +@test "disinto init --backend=nomad without import flags shows skip message" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"no --import-env/--import-sops"* ]] + [[ "$output" == *"skipping"* ]] +} + +@test "disinto init --backend=nomad --import-env --import-sops --age-key --with forgejo --dry-run shows all plans" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-env /tmp/.env --import-sops /tmp/.env.vault.enc --age-key /tmp/keys.txt --with forgejo --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"Vault import dry-run"* ]] + [[ "$output" == *"Vault policies dry-run"* ]] + [[ "$output" == *"Vault auth dry-run"* ]] + [[ "$output" == *"Deploy services dry-run"* ]] +} + +@test "disinto init --backend=nomad --dry-run prints policies + auth plan even without --import-*" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --dry-run + [ "$status" -eq 0 ] + # Policies + auth run on every nomad path (idempotent), so the dry-run + # plan always lists them — regardless of whether --import-* is set. + [[ "$output" == *"Vault policies dry-run"* ]] + [[ "$output" == *"Vault auth dry-run"* ]] + [[ "$output" != *"Vault import dry-run"* ]] +} + +# --import-env=PATH (=-form) must work alongside --import-env PATH. +@test "disinto init --backend=nomad --import-env=PATH (equals form) works" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --import-env=/tmp/.env --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"env file: /tmp/.env"* ]] +}