fix: [nomad-step-0] S0.3 — install vault + systemd auto-unseal + vault-init.sh (dev-persisted seal) (#823)
Adds the Vault half of the factory-dev-box bringup, landed but not started (per the install-but-don't-start pattern used for nomad in #822): - lib/init/nomad/install.sh — now also installs vault from the shared HashiCorp apt repo. VAULT_VERSION pinned (1.18.5). Fast-path skips apt entirely when both binaries are at their pins; partial upgrades only touch the package that drifted. - nomad/vault.hcl — single-node config: file storage backend at /var/lib/vault/data, localhost listener on :8200, ui on, mlock kept on. No TLS / HA / audit yet; those land in later steps. - lib/init/nomad/systemd-vault.sh — writes /etc/systemd/system/vault.service (Type=notify, ExecStartPost auto-unseals from /etc/vault.d/unseal.key, CAP_IPC_LOCK granted for mlock), deploys nomad/vault.hcl to /etc/vault.d/, creates /var/lib/vault/data (0700 root), enables the unit without starting it. Idempotent via content-compare. - lib/init/nomad/vault-init.sh — first-run init: spawns a temporary `vault server` if not already reachable, runs operator-init with key-shares=1/threshold=1, persists unseal.key + root.token (0400 root), unseals once in-process, shuts down the temp server. Re-run detects initialized + unseal.key present → no-op. Initialized but key missing is a hard failure (can't recover). lib/hvault.sh already defaults VAULT_TOKEN to /etc/vault.d/root.token when the env var is absent, so no change needed there. Seal model: the single unseal key lives on disk; seal-key theft equals vault theft. Factory-dev-box-acceptable tradeoff — avoids running a second Vault to auto-unseal the first. Blocks S0.4 (#824). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
75bec43c4a
commit
90f13c0313
4 changed files with 471 additions and 34 deletions
|
|
@ -1,27 +1,30 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# lib/init/nomad/install.sh — Idempotent apt install of HashiCorp Nomad
|
# lib/init/nomad/install.sh — Idempotent apt install of HashiCorp Nomad + Vault
|
||||||
#
|
#
|
||||||
# Part of the Nomad+Vault migration (S0.2, issue #822). Installs the `nomad`
|
# Part of the Nomad+Vault migration. Installs both the `nomad` binary (S0.2,
|
||||||
# binary from the HashiCorp apt repository. Does NOT install Vault — S0.3
|
# issue #822) and the `vault` binary (S0.3, issue #823) from the same
|
||||||
# owns that. Does NOT configure, start, or enable a systemd unit —
|
# HashiCorp apt repository. Does NOT configure, start, or enable any systemd
|
||||||
# lib/init/nomad/systemd-nomad.sh owns that. Does NOT wire this script into
|
# unit — lib/init/nomad/systemd-nomad.sh and lib/init/nomad/systemd-vault.sh
|
||||||
# `disinto init` — S0.4 owns that.
|
# own that. Does NOT wire this script into `disinto init` — S0.4 owns that.
|
||||||
#
|
#
|
||||||
# Idempotency contract:
|
# Idempotency contract:
|
||||||
# - Running twice back-to-back is a no-op once the target version is
|
# - Running twice back-to-back is a no-op once both target versions are
|
||||||
# installed and the apt source is in place.
|
# installed and the apt source is in place.
|
||||||
# - Adds the HashiCorp apt keyring only if it is absent.
|
# - Adds the HashiCorp apt keyring only if it is absent.
|
||||||
# - Adds the HashiCorp apt sources list only if it is absent.
|
# - Adds the HashiCorp apt sources list only if it is absent.
|
||||||
# - Skips `apt-get install` entirely when the installed version already
|
# - Skips `apt-get install` for any package whose installed version already
|
||||||
# matches ${NOMAD_VERSION}.
|
# matches the pin. If both are at pin, exits before touching apt.
|
||||||
#
|
#
|
||||||
# Configuration:
|
# Configuration:
|
||||||
# NOMAD_VERSION — pinned Nomad version (default: see below). The apt
|
# NOMAD_VERSION — pinned Nomad version (default: see below). Apt package
|
||||||
# package name is versioned as "nomad=<version>-1".
|
# name is versioned as "nomad=<version>-1".
|
||||||
|
# VAULT_VERSION — pinned Vault version (default: see below). Apt package
|
||||||
|
# name is versioned as "vault=<version>-1".
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# sudo NOMAD_VERSION=1.9.5 lib/init/nomad/install.sh
|
# sudo lib/init/nomad/install.sh
|
||||||
|
# sudo NOMAD_VERSION=1.9.5 VAULT_VERSION=1.18.5 lib/init/nomad/install.sh
|
||||||
#
|
#
|
||||||
# Exit codes:
|
# Exit codes:
|
||||||
# 0 success (installed or already present)
|
# 0 success (installed or already present)
|
||||||
|
|
@ -29,16 +32,29 @@
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
# Pin to a specific Nomad 1.x release. Bump here, not at call sites.
|
# Pin to specific 1.x releases. Bump here, not at call sites.
|
||||||
NOMAD_VERSION="${NOMAD_VERSION:-1.9.5}"
|
NOMAD_VERSION="${NOMAD_VERSION:-1.9.5}"
|
||||||
|
VAULT_VERSION="${VAULT_VERSION:-1.18.5}"
|
||||||
|
|
||||||
HASHICORP_KEYRING="/usr/share/keyrings/hashicorp-archive-keyring.gpg"
|
HASHICORP_KEYRING="/usr/share/keyrings/hashicorp-archive-keyring.gpg"
|
||||||
HASHICORP_SOURCES="/etc/apt/sources.list.d/hashicorp.list"
|
HASHICORP_SOURCES="/etc/apt/sources.list.d/hashicorp.list"
|
||||||
HASHICORP_GPG_URL="https://apt.releases.hashicorp.com/gpg"
|
HASHICORP_GPG_URL="https://apt.releases.hashicorp.com/gpg"
|
||||||
HASHICORP_REPO_URL="https://apt.releases.hashicorp.com"
|
HASHICORP_REPO_URL="https://apt.releases.hashicorp.com"
|
||||||
|
|
||||||
log() { printf '[install-nomad] %s\n' "$*"; }
|
log() { printf '[install] %s\n' "$*"; }
|
||||||
die() { printf '[install-nomad] ERROR: %s\n' "$*" >&2; exit 1; }
|
die() { printf '[install] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||||
|
|
||||||
|
# _installed_version BINARY
|
||||||
|
# Echoes the installed semver for `nomad` or `vault` (e.g. "1.9.5").
|
||||||
|
# Both tools print their version on the first line of `<bin> version` as
|
||||||
|
# "<Name> v<semver>..." — the shared awk extracts $2 with the leading "v"
|
||||||
|
# stripped. Empty string when the binary is absent or output is unexpected.
|
||||||
|
_installed_version() {
|
||||||
|
local bin="$1"
|
||||||
|
command -v "$bin" >/dev/null 2>&1 || { printf ''; return 0; }
|
||||||
|
"$bin" version 2>/dev/null \
|
||||||
|
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}'
|
||||||
|
}
|
||||||
|
|
||||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||||
if [ "$(id -u)" -ne 0 ]; then
|
if [ "$(id -u)" -ne 0 ]; then
|
||||||
|
|
@ -53,16 +69,24 @@ done
|
||||||
CODENAME="$(lsb_release -cs)"
|
CODENAME="$(lsb_release -cs)"
|
||||||
[ -n "$CODENAME" ] || die "lsb_release returned empty codename"
|
[ -n "$CODENAME" ] || die "lsb_release returned empty codename"
|
||||||
|
|
||||||
# ── Fast-path: already at desired version? ───────────────────────────────────
|
# ── Fast-path: are both already at desired versions? ─────────────────────────
|
||||||
installed_version=""
|
nomad_installed="$(_installed_version nomad)"
|
||||||
if command -v nomad >/dev/null 2>&1; then
|
vault_installed="$(_installed_version vault)"
|
||||||
# `nomad version` prints e.g. "Nomad v1.9.5" on the first line.
|
|
||||||
installed_version="$(nomad version 2>/dev/null \
|
need_pkgs=()
|
||||||
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}')"
|
if [ "$nomad_installed" = "$NOMAD_VERSION" ]; then
|
||||||
|
log "nomad ${NOMAD_VERSION} already installed"
|
||||||
|
else
|
||||||
|
need_pkgs+=("nomad=${NOMAD_VERSION}-1")
|
||||||
|
fi
|
||||||
|
if [ "$vault_installed" = "$VAULT_VERSION" ]; then
|
||||||
|
log "vault ${VAULT_VERSION} already installed"
|
||||||
|
else
|
||||||
|
need_pkgs+=("vault=${VAULT_VERSION}-1")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$installed_version" = "$NOMAD_VERSION" ]; then
|
if [ "${#need_pkgs[@]}" -eq 0 ]; then
|
||||||
log "nomad ${NOMAD_VERSION} already installed — nothing to do"
|
log "nothing to do"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -94,25 +118,26 @@ else
|
||||||
apt_update_needed=0
|
apt_update_needed=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# ── Install the pinned version ───────────────────────────────────────────────
|
# ── Install the pinned versions ──────────────────────────────────────────────
|
||||||
if [ "$apt_update_needed" -eq 1 ]; then
|
if [ "$apt_update_needed" -eq 1 ]; then
|
||||||
log "running apt-get update"
|
log "running apt-get update"
|
||||||
DEBIAN_FRONTEND=noninteractive apt-get update -qq \
|
DEBIAN_FRONTEND=noninteractive apt-get update -qq \
|
||||||
|| die "apt-get update failed"
|
|| die "apt-get update failed"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# HashiCorp apt packages use the "<version>-1" package-revision suffix.
|
log "installing ${need_pkgs[*]}"
|
||||||
pkg_spec="nomad=${NOMAD_VERSION}-1"
|
|
||||||
log "installing ${pkg_spec}"
|
|
||||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
"$pkg_spec" \
|
"${need_pkgs[@]}" \
|
||||||
|| die "apt-get install ${pkg_spec} failed"
|
|| die "apt-get install ${need_pkgs[*]} failed"
|
||||||
|
|
||||||
# ── Verify ───────────────────────────────────────────────────────────────────
|
# ── Verify ───────────────────────────────────────────────────────────────────
|
||||||
final_version="$(nomad version 2>/dev/null \
|
final_nomad="$(_installed_version nomad)"
|
||||||
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}')"
|
if [ "$final_nomad" != "$NOMAD_VERSION" ]; then
|
||||||
if [ "$final_version" != "$NOMAD_VERSION" ]; then
|
die "post-install check: expected nomad ${NOMAD_VERSION}, got '${final_nomad}'"
|
||||||
die "post-install check: expected ${NOMAD_VERSION}, got '${final_version}'"
|
fi
|
||||||
|
final_vault="$(_installed_version vault)"
|
||||||
|
if [ "$final_vault" != "$VAULT_VERSION" ]; then
|
||||||
|
die "post-install check: expected vault ${VAULT_VERSION}, got '${final_vault}'"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "nomad ${NOMAD_VERSION} installed successfully"
|
log "nomad ${NOMAD_VERSION} + vault ${VAULT_VERSION} installed successfully"
|
||||||
|
|
|
||||||
178
lib/init/nomad/systemd-vault.sh
Executable file
178
lib/init/nomad/systemd-vault.sh
Executable file
|
|
@ -0,0 +1,178 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# lib/init/nomad/systemd-vault.sh — Idempotent systemd unit installer for Vault
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S0.3, issue #823). Lands three things:
|
||||||
|
# 1. /etc/vault.d/ (0755 root:root)
|
||||||
|
# 2. /etc/vault.d/vault.hcl (copy of nomad/vault.hcl, 0644 root:root)
|
||||||
|
# 3. /var/lib/vault/data/ (0700 root:root, Vault file-storage backend)
|
||||||
|
# 4. /etc/systemd/system/vault.service (0644 root:root)
|
||||||
|
#
|
||||||
|
# Then `systemctl enable vault` WITHOUT starting the service. Bootstrap
|
||||||
|
# order is:
|
||||||
|
# lib/init/nomad/install.sh (nomad + vault binaries)
|
||||||
|
# lib/init/nomad/systemd-vault.sh (this script — unit + config + dirs)
|
||||||
|
# lib/init/nomad/vault-init.sh (init + write unseal.key + unseal once)
|
||||||
|
# systemctl start vault (ExecStartPost auto-unseals from file)
|
||||||
|
#
|
||||||
|
# The systemd unit's ExecStartPost reads /etc/vault.d/unseal.key and calls
|
||||||
|
# `vault operator unseal`. That file is written by vault-init.sh on first
|
||||||
|
# run; until it exists, `systemctl start vault` will leave Vault sealed
|
||||||
|
# (ExecStartPost fails, unit goes into failed state — intentional, visible).
|
||||||
|
#
|
||||||
|
# Seal model:
|
||||||
|
# The single unseal key lives at /etc/vault.d/unseal.key (0400 root).
|
||||||
|
# Seal-key theft == vault theft. Dev-box acceptable; see docs/VAULT.md.
|
||||||
|
#
|
||||||
|
# Idempotency contract:
|
||||||
|
# - Unit file NOT rewritten when on-disk content already matches desired.
|
||||||
|
# - vault.hcl NOT rewritten when on-disk content matches the repo copy.
|
||||||
|
# - `systemctl enable` on an already-enabled unit is a no-op.
|
||||||
|
# - Safe to run unconditionally before every factory boot.
|
||||||
|
#
|
||||||
|
# Preconditions:
|
||||||
|
# - vault binary installed (lib/init/nomad/install.sh)
|
||||||
|
# - nomad/vault.hcl present in the repo (relative to this script)
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# sudo lib/init/nomad/systemd-vault.sh
|
||||||
|
#
|
||||||
|
# Exit codes:
|
||||||
|
# 0 success (unit+config installed + enabled, or already so)
|
||||||
|
# 1 precondition failure (not root, no systemctl, no vault binary,
|
||||||
|
# missing source config)
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
UNIT_PATH="/etc/systemd/system/vault.service"
|
||||||
|
VAULT_CONFIG_DIR="/etc/vault.d"
|
||||||
|
VAULT_CONFIG_FILE="${VAULT_CONFIG_DIR}/vault.hcl"
|
||||||
|
VAULT_DATA_DIR="/var/lib/vault/data"
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
|
||||||
|
VAULT_HCL_SRC="${REPO_ROOT}/nomad/vault.hcl"
|
||||||
|
|
||||||
|
log() { printf '[systemd-vault] %s\n' "$*"; }
|
||||||
|
die() { printf '[systemd-vault] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||||
|
|
||||||
|
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||||
|
if [ "$(id -u)" -ne 0 ]; then
|
||||||
|
die "must run as root (needs write access to ${UNIT_PATH})"
|
||||||
|
fi
|
||||||
|
|
||||||
|
command -v systemctl >/dev/null 2>&1 \
|
||||||
|
|| die "systemctl not found (systemd is required)"
|
||||||
|
|
||||||
|
VAULT_BIN="$(command -v vault 2>/dev/null || true)"
|
||||||
|
[ -n "$VAULT_BIN" ] \
|
||||||
|
|| die "vault binary not found — run lib/init/nomad/install.sh first"
|
||||||
|
|
||||||
|
[ -f "$VAULT_HCL_SRC" ] \
|
||||||
|
|| die "source config not found: ${VAULT_HCL_SRC}"
|
||||||
|
|
||||||
|
# ── Desired unit content ─────────────────────────────────────────────────────
|
||||||
|
# Adapted from HashiCorp's recommended vault.service template
|
||||||
|
# (https://developer.hashicorp.com/vault/tutorials/getting-started-deploy/deploy)
|
||||||
|
# for a single-node factory dev box:
|
||||||
|
# - User=root keeps the seal-key read path simple (unseal.key is 0400 root).
|
||||||
|
# - CAP_IPC_LOCK lets mlock() succeed so disable_mlock=false is honoured.
|
||||||
|
# Harmless when running as root; required if this is ever flipped to a
|
||||||
|
# dedicated `vault` user.
|
||||||
|
# - ExecStartPost auto-unseals on every boot using the persisted key.
|
||||||
|
# This is the dev-persisted-seal tradeoff — seal-key theft == vault
|
||||||
|
# theft, but no second Vault to babysit.
|
||||||
|
# - ConditionFileNotEmpty guards against starting without config — makes
|
||||||
|
# a missing vault.hcl visible in systemctl status, not a crash loop.
|
||||||
|
# - Type=notify so systemd waits for Vault's listener-ready notification
|
||||||
|
# before running ExecStartPost (ExecStartPost also has `sleep 2` as a
|
||||||
|
# belt-and-braces guard against Type=notify edge cases).
|
||||||
|
# - \$MAINPID is escaped so bash doesn't expand it inside this heredoc.
|
||||||
|
# - \$(cat ...) is escaped so the subshell runs at unit-execution time
|
||||||
|
# (inside bash -c), not at heredoc-expansion time here.
|
||||||
|
read -r -d '' DESIRED_UNIT <<EOF || true
|
||||||
|
[Unit]
|
||||||
|
Description=HashiCorp Vault
|
||||||
|
Documentation=https://developer.hashicorp.com/vault/docs
|
||||||
|
Requires=network-online.target
|
||||||
|
After=network-online.target
|
||||||
|
ConditionFileNotEmpty=${VAULT_CONFIG_FILE}
|
||||||
|
StartLimitIntervalSec=60
|
||||||
|
StartLimitBurst=3
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=notify
|
||||||
|
User=root
|
||||||
|
Group=root
|
||||||
|
Environment=VAULT_ADDR=http://127.0.0.1:8200
|
||||||
|
SecureBits=keep-caps
|
||||||
|
CapabilityBoundingSet=CAP_IPC_LOCK
|
||||||
|
AmbientCapabilities=CAP_IPC_LOCK
|
||||||
|
ExecStart=${VAULT_BIN} server -config=${VAULT_CONFIG_FILE}
|
||||||
|
ExecStartPost=/bin/bash -c 'sleep 2 && ${VAULT_BIN} operator unseal \$(cat ${VAULT_CONFIG_DIR}/unseal.key)'
|
||||||
|
ExecReload=/bin/kill --signal HUP \$MAINPID
|
||||||
|
KillMode=process
|
||||||
|
KillSignal=SIGINT
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5
|
||||||
|
TimeoutStopSec=30
|
||||||
|
LimitNOFILE=65536
|
||||||
|
LimitMEMLOCK=infinity
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# ── Ensure config + data dirs exist ──────────────────────────────────────────
|
||||||
|
# /etc/vault.d is 0755 — vault.hcl is world-readable (no secrets in it);
|
||||||
|
# the real secrets (unseal.key, root.token) get their own 0400 mode.
|
||||||
|
# /var/lib/vault/data is 0700 — vault's on-disk state (encrypted-at-rest
|
||||||
|
# by Vault itself, but an extra layer of "don't rely on that").
|
||||||
|
if [ ! -d "$VAULT_CONFIG_DIR" ]; then
|
||||||
|
log "creating ${VAULT_CONFIG_DIR}"
|
||||||
|
install -d -m 0755 -o root -g root "$VAULT_CONFIG_DIR"
|
||||||
|
fi
|
||||||
|
if [ ! -d "$VAULT_DATA_DIR" ]; then
|
||||||
|
log "creating ${VAULT_DATA_DIR}"
|
||||||
|
install -d -m 0700 -o root -g root "$VAULT_DATA_DIR"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Install vault.hcl only if content differs ────────────────────────────────
|
||||||
|
if [ ! -f "$VAULT_CONFIG_FILE" ] \
|
||||||
|
|| ! cmp -s "$VAULT_HCL_SRC" "$VAULT_CONFIG_FILE"; then
|
||||||
|
log "writing config → ${VAULT_CONFIG_FILE}"
|
||||||
|
install -m 0644 -o root -g root "$VAULT_HCL_SRC" "$VAULT_CONFIG_FILE"
|
||||||
|
else
|
||||||
|
log "config already up to date"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Install unit file only if content differs ────────────────────────────────
|
||||||
|
needs_reload=0
|
||||||
|
if [ ! -f "$UNIT_PATH" ] \
|
||||||
|
|| ! printf '%s\n' "$DESIRED_UNIT" | cmp -s - "$UNIT_PATH"; then
|
||||||
|
log "writing unit → ${UNIT_PATH}"
|
||||||
|
tmp="$(mktemp)"
|
||||||
|
trap 'rm -f "$tmp"' EXIT
|
||||||
|
printf '%s\n' "$DESIRED_UNIT" > "$tmp"
|
||||||
|
install -m 0644 -o root -g root "$tmp" "$UNIT_PATH"
|
||||||
|
rm -f "$tmp"
|
||||||
|
trap - EXIT
|
||||||
|
needs_reload=1
|
||||||
|
else
|
||||||
|
log "unit file already up to date"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Reload + enable ──────────────────────────────────────────────────────────
|
||||||
|
if [ "$needs_reload" -eq 1 ]; then
|
||||||
|
log "systemctl daemon-reload"
|
||||||
|
systemctl daemon-reload
|
||||||
|
fi
|
||||||
|
|
||||||
|
if systemctl is-enabled --quiet vault.service 2>/dev/null; then
|
||||||
|
log "vault.service already enabled"
|
||||||
|
else
|
||||||
|
log "systemctl enable vault"
|
||||||
|
systemctl enable vault.service >/dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "done — unit+config installed and enabled (NOT started; vault-init.sh next)"
|
||||||
193
lib/init/nomad/vault-init.sh
Executable file
193
lib/init/nomad/vault-init.sh
Executable file
|
|
@ -0,0 +1,193 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# lib/init/nomad/vault-init.sh — Idempotent Vault first-run initializer
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S0.3, issue #823). Initializes Vault
|
||||||
|
# in dev-persisted-seal mode (single unseal key on disk) and unseals once.
|
||||||
|
# On re-run, becomes a no-op — never re-initializes or rotates the key.
|
||||||
|
#
|
||||||
|
# What it does (first run):
|
||||||
|
# 1. Ensures Vault is reachable at ${VAULT_ADDR} — spawns a temporary
|
||||||
|
# `vault server -config=/etc/vault.d/vault.hcl` if not already up.
|
||||||
|
# 2. Runs `vault operator init -key-shares=1 -key-threshold=1` and
|
||||||
|
# captures the resulting unseal key + root token.
|
||||||
|
# 3. Writes /etc/vault.d/unseal.key (0400 root, no trailing newline).
|
||||||
|
# 4. Writes /etc/vault.d/root.token (0400 root, no trailing newline).
|
||||||
|
# 5. Unseals Vault once in the current process.
|
||||||
|
# 6. Shuts down the temporary server if we started one (so a subsequent
|
||||||
|
# `systemctl start vault` doesn't conflict on port 8200).
|
||||||
|
#
|
||||||
|
# Idempotency contract:
|
||||||
|
# - /etc/vault.d/unseal.key exists AND `vault status` reports
|
||||||
|
# initialized=true → exit 0, no mutation, no re-init.
|
||||||
|
# - Initialized-but-unseal.key-missing is a hard failure (can't recover
|
||||||
|
# the key without the existing storage; user must restore from backup).
|
||||||
|
#
|
||||||
|
# Bootstrap order:
|
||||||
|
# lib/init/nomad/install.sh (installs vault binary)
|
||||||
|
# lib/init/nomad/systemd-vault.sh (lands unit + config + dirs; enables)
|
||||||
|
# lib/init/nomad/vault-init.sh (this script — init + unseal once)
|
||||||
|
# systemctl start vault (ExecStartPost auto-unseals henceforth)
|
||||||
|
#
|
||||||
|
# Seal model:
|
||||||
|
# Single unseal key persisted on disk at /etc/vault.d/unseal.key. Seal-key
|
||||||
|
# theft == vault theft. Factory-dev-box-acceptable; see docs/VAULT.md.
|
||||||
|
#
|
||||||
|
# Environment:
|
||||||
|
# VAULT_ADDR — Vault API address (default: http://127.0.0.1:8200).
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# sudo lib/init/nomad/vault-init.sh
|
||||||
|
#
|
||||||
|
# Exit codes:
|
||||||
|
# 0 success (initialized + unsealed + keys persisted; or already done)
|
||||||
|
# 1 precondition / operational failure
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
VAULT_CONFIG_FILE="/etc/vault.d/vault.hcl"
|
||||||
|
UNSEAL_KEY_FILE="/etc/vault.d/unseal.key"
|
||||||
|
ROOT_TOKEN_FILE="/etc/vault.d/root.token"
|
||||||
|
VAULT_ADDR="${VAULT_ADDR:-http://127.0.0.1:8200}"
|
||||||
|
export VAULT_ADDR
|
||||||
|
|
||||||
|
# Track whether we spawned a temporary vault (for cleanup).
|
||||||
|
spawned_pid=""
|
||||||
|
spawned_log=""
|
||||||
|
|
||||||
|
log() { printf '[vault-init] %s\n' "$*"; }
|
||||||
|
die() { printf '[vault-init] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||||
|
|
||||||
|
# ── Cleanup: stop the temporary server (if we started one) on any exit ───────
|
||||||
|
# EXIT trap fires on success AND failure AND signals — so we never leak a
|
||||||
|
# background vault process holding port 8200 after this script returns.
|
||||||
|
cleanup() {
|
||||||
|
if [ -n "$spawned_pid" ] && kill -0 "$spawned_pid" 2>/dev/null; then
|
||||||
|
log "stopping temporary vault (pid=${spawned_pid})"
|
||||||
|
kill "$spawned_pid" 2>/dev/null || true
|
||||||
|
wait "$spawned_pid" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
if [ -n "$spawned_log" ] && [ -f "$spawned_log" ]; then
|
||||||
|
rm -f "$spawned_log"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||||
|
if [ "$(id -u)" -ne 0 ]; then
|
||||||
|
die "must run as root (needs to write 0400 files under /etc/vault.d)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
for bin in vault jq; do
|
||||||
|
command -v "$bin" >/dev/null 2>&1 \
|
||||||
|
|| die "required binary not found: ${bin}"
|
||||||
|
done
|
||||||
|
|
||||||
|
[ -f "$VAULT_CONFIG_FILE" ] \
|
||||||
|
|| die "config not found: ${VAULT_CONFIG_FILE} — run systemd-vault.sh first"
|
||||||
|
|
||||||
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# vault_reachable — true iff `vault status` can reach the server.
|
||||||
|
# Exit codes from `vault status`:
|
||||||
|
# 0 = reachable, initialized, unsealed
|
||||||
|
# 2 = reachable, sealed (or uninitialized)
|
||||||
|
# 1 = unreachable / other error
|
||||||
|
# We treat 0 and 2 as "reachable". `|| status=$?` avoids set -e tripping
|
||||||
|
# on the expected sealed-is-also-fine case.
|
||||||
|
vault_reachable() {
|
||||||
|
local status=0
|
||||||
|
vault status -format=json >/dev/null 2>&1 || status=$?
|
||||||
|
[ "$status" -eq 0 ] || [ "$status" -eq 2 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
# vault_initialized — echoes "true" / "false" / "" (empty on parse failure).
|
||||||
|
vault_initialized() {
|
||||||
|
vault status -format=json 2>/dev/null | jq -r '.initialized' 2>/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
# write_secret_file PATH CONTENT
|
||||||
|
# Write CONTENT to PATH atomically with 0400 root:root and no trailing
|
||||||
|
# newline. mktemp+install keeps perms tight for the whole lifetime of
|
||||||
|
# the file on disk — no 0644-then-chmod window.
|
||||||
|
write_secret_file() {
|
||||||
|
local path="$1" content="$2"
|
||||||
|
local tmp
|
||||||
|
tmp="$(mktemp)"
|
||||||
|
printf '%s' "$content" > "$tmp"
|
||||||
|
install -m 0400 -o root -g root "$tmp" "$path"
|
||||||
|
rm -f "$tmp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Ensure vault is reachable ────────────────────────────────────────────────
|
||||||
|
if ! vault_reachable; then
|
||||||
|
log "vault not reachable at ${VAULT_ADDR} — starting temporary server"
|
||||||
|
spawned_log="$(mktemp)"
|
||||||
|
vault server -config="$VAULT_CONFIG_FILE" >"$spawned_log" 2>&1 &
|
||||||
|
spawned_pid=$!
|
||||||
|
|
||||||
|
# Poll for readiness. Vault's API listener comes up before notify-ready
|
||||||
|
# in Type=notify mode, but well inside a few seconds even on cold boots.
|
||||||
|
ready=0
|
||||||
|
for _ in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
|
||||||
|
if vault_reachable; then
|
||||||
|
ready=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
if [ "$ready" -ne 1 ]; then
|
||||||
|
log "vault did not become reachable within 15s — server log follows:"
|
||||||
|
if [ -f "$spawned_log" ]; then
|
||||||
|
sed 's/^/[vault-server] /' "$spawned_log" >&2 || true
|
||||||
|
fi
|
||||||
|
die "failed to start temporary vault server"
|
||||||
|
fi
|
||||||
|
log "temporary vault ready (pid=${spawned_pid})"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Idempotency gate ─────────────────────────────────────────────────────────
|
||||||
|
initialized="$(vault_initialized)"
|
||||||
|
|
||||||
|
if [ "$initialized" = "true" ] && [ -f "$UNSEAL_KEY_FILE" ]; then
|
||||||
|
log "vault already initialized and unseal.key present — no-op"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$initialized" = "true" ] && [ ! -f "$UNSEAL_KEY_FILE" ]; then
|
||||||
|
die "vault is initialized but ${UNSEAL_KEY_FILE} is missing — cannot recover the unseal key; restore from backup or wipe ${VAULT_CONFIG_FILE%/*}/data and re-run"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$initialized" != "false" ]; then
|
||||||
|
die "unexpected initialized state: '${initialized}' (expected 'true' or 'false')"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Initialize ───────────────────────────────────────────────────────────────
|
||||||
|
log "initializing vault (key-shares=1, key-threshold=1)"
|
||||||
|
init_json="$(vault operator init \
|
||||||
|
-key-shares=1 \
|
||||||
|
-key-threshold=1 \
|
||||||
|
-format=json)" \
|
||||||
|
|| die "vault operator init failed"
|
||||||
|
|
||||||
|
unseal_key="$(printf '%s' "$init_json" | jq -er '.unseal_keys_b64[0]')" \
|
||||||
|
|| die "failed to extract unseal key from init response"
|
||||||
|
root_token="$(printf '%s' "$init_json" | jq -er '.root_token')" \
|
||||||
|
|| die "failed to extract root token from init response"
|
||||||
|
|
||||||
|
# Best-effort scrub of init_json from the env (the captured key+token still
|
||||||
|
# sit in the local vars above — there's no clean way to wipe bash memory).
|
||||||
|
unset init_json
|
||||||
|
|
||||||
|
# ── Persist keys ─────────────────────────────────────────────────────────────
|
||||||
|
log "writing ${UNSEAL_KEY_FILE} (0400 root)"
|
||||||
|
write_secret_file "$UNSEAL_KEY_FILE" "$unseal_key"
|
||||||
|
log "writing ${ROOT_TOKEN_FILE} (0400 root)"
|
||||||
|
write_secret_file "$ROOT_TOKEN_FILE" "$root_token"
|
||||||
|
|
||||||
|
# ── Unseal in the current process ────────────────────────────────────────────
|
||||||
|
log "unsealing vault"
|
||||||
|
vault operator unseal "$unseal_key" >/dev/null \
|
||||||
|
|| die "vault operator unseal failed"
|
||||||
|
|
||||||
|
log "done — vault initialized + unsealed + keys persisted"
|
||||||
41
nomad/vault.hcl
Normal file
41
nomad/vault.hcl
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
# =============================================================================
|
||||||
|
# nomad/vault.hcl — Single-node Vault configuration (dev-persisted seal)
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S0.3, issue #823). Deployed to
|
||||||
|
# /etc/vault.d/vault.hcl on the factory dev box.
|
||||||
|
#
|
||||||
|
# Seal model: the single unseal key lives on disk at /etc/vault.d/unseal.key
|
||||||
|
# (0400 root) and is read by systemd ExecStartPost on every boot. This is
|
||||||
|
# the factory-dev-box-acceptable tradeoff — seal-key theft equals vault
|
||||||
|
# theft, but we avoid running a second Vault to auto-unseal the first.
|
||||||
|
#
|
||||||
|
# This is a factory dev-box baseline — TLS, HA, Raft storage, and audit
|
||||||
|
# devices are deliberately absent. Storage is the `file` backend (single
|
||||||
|
# node only). Listener is localhost-only, so no external TLS is needed.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# File storage backend — single-node only, no HA, no raft. State lives in
|
||||||
|
# /var/lib/vault/data which is created (root:root 0700) by
|
||||||
|
# lib/init/nomad/systemd-vault.sh before the unit starts.
|
||||||
|
storage "file" {
|
||||||
|
path = "/var/lib/vault/data"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Localhost-only listener. TLS is disabled because all callers are on the
|
||||||
|
# same box — flipping this to tls_disable=false is an audit-worthy change
|
||||||
|
# paired with cert provisioning.
|
||||||
|
listener "tcp" {
|
||||||
|
address = "127.0.0.1:8200"
|
||||||
|
tls_disable = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# mlock prevents Vault's in-memory secrets from being swapped to disk. We
|
||||||
|
# keep it enabled; the systemd unit grants CAP_IPC_LOCK so mlock() succeeds.
|
||||||
|
disable_mlock = false
|
||||||
|
|
||||||
|
# Advertised API address — used by Vault clients on this host. Matches
|
||||||
|
# the listener above.
|
||||||
|
api_addr = "http://127.0.0.1:8200"
|
||||||
|
|
||||||
|
# UI on by default — same bind as listener, no TLS (localhost only).
|
||||||
|
ui = true
|
||||||
Loading…
Add table
Add a link
Reference in a new issue