fix: [nomad-step-0] S0.3 — install vault + systemd auto-unseal + vault-init.sh (dev-persisted seal) (#823)
Adds the Vault half of the factory-dev-box bringup, landed but not started (per the install-but-don't-start pattern used for nomad in #822): - lib/init/nomad/install.sh — now also installs vault from the shared HashiCorp apt repo. VAULT_VERSION pinned (1.18.5). Fast-path skips apt entirely when both binaries are at their pins; partial upgrades only touch the package that drifted. - nomad/vault.hcl — single-node config: file storage backend at /var/lib/vault/data, localhost listener on :8200, ui on, mlock kept on. No TLS / HA / audit yet; those land in later steps. - lib/init/nomad/systemd-vault.sh — writes /etc/systemd/system/vault.service (Type=notify, ExecStartPost auto-unseals from /etc/vault.d/unseal.key, CAP_IPC_LOCK granted for mlock), deploys nomad/vault.hcl to /etc/vault.d/, creates /var/lib/vault/data (0700 root), enables the unit without starting it. Idempotent via content-compare. - lib/init/nomad/vault-init.sh — first-run init: spawns a temporary `vault server` if not already reachable, runs operator-init with key-shares=1/threshold=1, persists unseal.key + root.token (0400 root), unseals once in-process, shuts down the temp server. Re-run detects initialized + unseal.key present → no-op. Initialized but key missing is a hard failure (can't recover). lib/hvault.sh already defaults VAULT_TOKEN to /etc/vault.d/root.token when the env var is absent, so no change needed there. Seal model: the single unseal key lives on disk; seal-key theft equals vault theft. Factory-dev-box-acceptable tradeoff — avoids running a second Vault to auto-unseal the first. Blocks S0.4 (#824). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
75bec43c4a
commit
57bc88b9a7
6 changed files with 519 additions and 68 deletions
|
|
@ -1,27 +1,30 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# lib/init/nomad/install.sh — Idempotent apt install of HashiCorp Nomad
|
||||
# lib/init/nomad/install.sh — Idempotent apt install of HashiCorp Nomad + Vault
|
||||
#
|
||||
# Part of the Nomad+Vault migration (S0.2, issue #822). Installs the `nomad`
|
||||
# binary from the HashiCorp apt repository. Does NOT install Vault — S0.3
|
||||
# owns that. Does NOT configure, start, or enable a systemd unit —
|
||||
# lib/init/nomad/systemd-nomad.sh owns that. Does NOT wire this script into
|
||||
# `disinto init` — S0.4 owns that.
|
||||
# Part of the Nomad+Vault migration. Installs both the `nomad` binary (S0.2,
|
||||
# issue #822) and the `vault` binary (S0.3, issue #823) from the same
|
||||
# HashiCorp apt repository. Does NOT configure, start, or enable any systemd
|
||||
# unit — lib/init/nomad/systemd-nomad.sh and lib/init/nomad/systemd-vault.sh
|
||||
# own that. Does NOT wire this script into `disinto init` — S0.4 owns that.
|
||||
#
|
||||
# Idempotency contract:
|
||||
# - Running twice back-to-back is a no-op once the target version is
|
||||
# - Running twice back-to-back is a no-op once both target versions are
|
||||
# installed and the apt source is in place.
|
||||
# - Adds the HashiCorp apt keyring only if it is absent.
|
||||
# - Adds the HashiCorp apt sources list only if it is absent.
|
||||
# - Skips `apt-get install` entirely when the installed version already
|
||||
# matches ${NOMAD_VERSION}.
|
||||
# - Skips `apt-get install` for any package whose installed version already
|
||||
# matches the pin. If both are at pin, exits before touching apt.
|
||||
#
|
||||
# Configuration:
|
||||
# NOMAD_VERSION — pinned Nomad version (default: see below). The apt
|
||||
# package name is versioned as "nomad=<version>-1".
|
||||
# NOMAD_VERSION — pinned Nomad version (default: see below). Apt package
|
||||
# name is versioned as "nomad=<version>-1".
|
||||
# VAULT_VERSION — pinned Vault version (default: see below). Apt package
|
||||
# name is versioned as "vault=<version>-1".
|
||||
#
|
||||
# Usage:
|
||||
# sudo NOMAD_VERSION=1.9.5 lib/init/nomad/install.sh
|
||||
# sudo lib/init/nomad/install.sh
|
||||
# sudo NOMAD_VERSION=1.9.5 VAULT_VERSION=1.18.5 lib/init/nomad/install.sh
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 success (installed or already present)
|
||||
|
|
@ -29,16 +32,29 @@
|
|||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
# Pin to a specific Nomad 1.x release. Bump here, not at call sites.
|
||||
# Pin to specific 1.x releases. Bump here, not at call sites.
|
||||
NOMAD_VERSION="${NOMAD_VERSION:-1.9.5}"
|
||||
VAULT_VERSION="${VAULT_VERSION:-1.18.5}"
|
||||
|
||||
HASHICORP_KEYRING="/usr/share/keyrings/hashicorp-archive-keyring.gpg"
|
||||
HASHICORP_SOURCES="/etc/apt/sources.list.d/hashicorp.list"
|
||||
HASHICORP_GPG_URL="https://apt.releases.hashicorp.com/gpg"
|
||||
HASHICORP_REPO_URL="https://apt.releases.hashicorp.com"
|
||||
|
||||
log() { printf '[install-nomad] %s\n' "$*"; }
|
||||
die() { printf '[install-nomad] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||
log() { printf '[install] %s\n' "$*"; }
|
||||
die() { printf '[install] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||
|
||||
# _installed_version BINARY
|
||||
# Echoes the installed semver for `nomad` or `vault` (e.g. "1.9.5").
|
||||
# Both tools print their version on the first line of `<bin> version` as
|
||||
# "<Name> v<semver>..." — the shared awk extracts $2 with the leading "v"
|
||||
# stripped. Empty string when the binary is absent or output is unexpected.
|
||||
_installed_version() {
|
||||
local bin="$1"
|
||||
command -v "$bin" >/dev/null 2>&1 || { printf ''; return 0; }
|
||||
"$bin" version 2>/dev/null \
|
||||
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}'
|
||||
}
|
||||
|
||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
|
|
@ -53,16 +69,24 @@ done
|
|||
CODENAME="$(lsb_release -cs)"
|
||||
[ -n "$CODENAME" ] || die "lsb_release returned empty codename"
|
||||
|
||||
# ── Fast-path: already at desired version? ───────────────────────────────────
|
||||
installed_version=""
|
||||
if command -v nomad >/dev/null 2>&1; then
|
||||
# `nomad version` prints e.g. "Nomad v1.9.5" on the first line.
|
||||
installed_version="$(nomad version 2>/dev/null \
|
||||
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}')"
|
||||
# ── Fast-path: are both already at desired versions? ─────────────────────────
|
||||
nomad_installed="$(_installed_version nomad)"
|
||||
vault_installed="$(_installed_version vault)"
|
||||
|
||||
need_pkgs=()
|
||||
if [ "$nomad_installed" = "$NOMAD_VERSION" ]; then
|
||||
log "nomad ${NOMAD_VERSION} already installed"
|
||||
else
|
||||
need_pkgs+=("nomad=${NOMAD_VERSION}-1")
|
||||
fi
|
||||
if [ "$vault_installed" = "$VAULT_VERSION" ]; then
|
||||
log "vault ${VAULT_VERSION} already installed"
|
||||
else
|
||||
need_pkgs+=("vault=${VAULT_VERSION}-1")
|
||||
fi
|
||||
|
||||
if [ "$installed_version" = "$NOMAD_VERSION" ]; then
|
||||
log "nomad ${NOMAD_VERSION} already installed — nothing to do"
|
||||
if [ "${#need_pkgs[@]}" -eq 0 ]; then
|
||||
log "nothing to do"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
|
@ -94,25 +118,26 @@ else
|
|||
apt_update_needed=0
|
||||
fi
|
||||
|
||||
# ── Install the pinned version ───────────────────────────────────────────────
|
||||
# ── Install the pinned versions ──────────────────────────────────────────────
|
||||
if [ "$apt_update_needed" -eq 1 ]; then
|
||||
log "running apt-get update"
|
||||
DEBIAN_FRONTEND=noninteractive apt-get update -qq \
|
||||
|| die "apt-get update failed"
|
||||
fi
|
||||
|
||||
# HashiCorp apt packages use the "<version>-1" package-revision suffix.
|
||||
pkg_spec="nomad=${NOMAD_VERSION}-1"
|
||||
log "installing ${pkg_spec}"
|
||||
log "installing ${need_pkgs[*]}"
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
"$pkg_spec" \
|
||||
|| die "apt-get install ${pkg_spec} failed"
|
||||
"${need_pkgs[@]}" \
|
||||
|| die "apt-get install ${need_pkgs[*]} failed"
|
||||
|
||||
# ── Verify ───────────────────────────────────────────────────────────────────
|
||||
final_version="$(nomad version 2>/dev/null \
|
||||
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}')"
|
||||
if [ "$final_version" != "$NOMAD_VERSION" ]; then
|
||||
die "post-install check: expected ${NOMAD_VERSION}, got '${final_version}'"
|
||||
final_nomad="$(_installed_version nomad)"
|
||||
if [ "$final_nomad" != "$NOMAD_VERSION" ]; then
|
||||
die "post-install check: expected nomad ${NOMAD_VERSION}, got '${final_nomad}'"
|
||||
fi
|
||||
final_vault="$(_installed_version vault)"
|
||||
if [ "$final_vault" != "$VAULT_VERSION" ]; then
|
||||
die "post-install check: expected vault ${VAULT_VERSION}, got '${final_vault}'"
|
||||
fi
|
||||
|
||||
log "nomad ${NOMAD_VERSION} installed successfully"
|
||||
log "nomad ${NOMAD_VERSION} + vault ${VAULT_VERSION} installed successfully"
|
||||
|
|
|
|||
70
lib/init/nomad/lib-systemd.sh
Normal file
70
lib/init/nomad/lib-systemd.sh
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# lib/init/nomad/lib-systemd.sh — Shared idempotent systemd-unit installer
|
||||
#
|
||||
# Sourced by lib/init/nomad/systemd-nomad.sh and lib/init/nomad/systemd-vault.sh
|
||||
# (and any future sibling) to collapse the "write unit if content differs,
|
||||
# daemon-reload, enable (never start)" boilerplate.
|
||||
#
|
||||
# Install-but-don't-start is the invariant this helper enforces — mid-migration
|
||||
# installers land files and enable units; the orchestrator (S0.4) starts them.
|
||||
#
|
||||
# Public API (sourced into caller scope):
|
||||
#
|
||||
# systemd_require_preconditions UNIT_PATH
|
||||
# Asserts the caller is uid 0 and `systemctl` is on $PATH. Calls the
|
||||
# caller's die() with a UNIT_PATH-scoped message on failure.
|
||||
#
|
||||
# systemd_install_unit UNIT_PATH UNIT_NAME UNIT_CONTENT
|
||||
# Writes UNIT_CONTENT to UNIT_PATH (0644 root:root) only if on-disk
|
||||
# content differs. If written, runs `systemctl daemon-reload`. Then
|
||||
# enables UNIT_NAME (no-op if already enabled). Never starts the unit.
|
||||
#
|
||||
# Caller contract:
|
||||
# - Callers MUST define `log()` and `die()` before sourcing this file (we
|
||||
# call log() for status chatter and rely on the caller's error-handling
|
||||
# stance; `set -e` propagates install/cmp/systemctl failures).
|
||||
# =============================================================================
|
||||
|
||||
# systemd_require_preconditions UNIT_PATH
|
||||
systemd_require_preconditions() {
|
||||
local unit_path="$1"
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
die "must run as root (needs write access to ${unit_path})"
|
||||
fi
|
||||
command -v systemctl >/dev/null 2>&1 \
|
||||
|| die "systemctl not found (systemd is required)"
|
||||
}
|
||||
|
||||
# systemd_install_unit UNIT_PATH UNIT_NAME UNIT_CONTENT
|
||||
systemd_install_unit() {
|
||||
local unit_path="$1"
|
||||
local unit_name="$2"
|
||||
local unit_content="$3"
|
||||
|
||||
local needs_reload=0
|
||||
if [ ! -f "$unit_path" ] \
|
||||
|| ! printf '%s\n' "$unit_content" | cmp -s - "$unit_path"; then
|
||||
log "writing unit → ${unit_path}"
|
||||
local tmp
|
||||
tmp="$(mktemp)"
|
||||
printf '%s\n' "$unit_content" > "$tmp"
|
||||
install -m 0644 -o root -g root "$tmp" "$unit_path"
|
||||
rm -f "$tmp"
|
||||
needs_reload=1
|
||||
else
|
||||
log "unit file already up to date"
|
||||
fi
|
||||
|
||||
if [ "$needs_reload" -eq 1 ]; then
|
||||
log "systemctl daemon-reload"
|
||||
systemctl daemon-reload
|
||||
fi
|
||||
|
||||
if systemctl is-enabled --quiet "$unit_name" 2>/dev/null; then
|
||||
log "${unit_name} already enabled"
|
||||
else
|
||||
log "systemctl enable ${unit_name}"
|
||||
systemctl enable "$unit_name" >/dev/null
|
||||
fi
|
||||
}
|
||||
|
|
@ -33,13 +33,11 @@ NOMAD_DATA_DIR="/var/lib/nomad"
|
|||
log() { printf '[systemd-nomad] %s\n' "$*"; }
|
||||
die() { printf '[systemd-nomad] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||
|
||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
die "must run as root (needs write access to ${UNIT_PATH})"
|
||||
fi
|
||||
# shellcheck source=lib-systemd.sh
|
||||
. "$(dirname "${BASH_SOURCE[0]}")/lib-systemd.sh"
|
||||
|
||||
command -v systemctl >/dev/null 2>&1 \
|
||||
|| die "systemctl not found (systemd is required)"
|
||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||
systemd_require_preconditions "$UNIT_PATH"
|
||||
|
||||
NOMAD_BIN="$(command -v nomad 2>/dev/null || true)"
|
||||
[ -n "$NOMAD_BIN" ] \
|
||||
|
|
@ -98,33 +96,7 @@ for d in "$NOMAD_CONFIG_DIR" "$NOMAD_DATA_DIR"; do
|
|||
fi
|
||||
done
|
||||
|
||||
# ── Install unit file only if content differs ────────────────────────────────
|
||||
needs_reload=0
|
||||
if [ ! -f "$UNIT_PATH" ] \
|
||||
|| ! printf '%s\n' "$DESIRED_UNIT" | cmp -s - "$UNIT_PATH"; then
|
||||
log "writing unit → ${UNIT_PATH}"
|
||||
tmp="$(mktemp)"
|
||||
trap 'rm -f "$tmp"' EXIT
|
||||
printf '%s\n' "$DESIRED_UNIT" > "$tmp"
|
||||
install -m 0644 -o root -g root "$tmp" "$UNIT_PATH"
|
||||
rm -f "$tmp"
|
||||
trap - EXIT
|
||||
needs_reload=1
|
||||
else
|
||||
log "unit file already up to date"
|
||||
fi
|
||||
|
||||
# ── Reload + enable ──────────────────────────────────────────────────────────
|
||||
if [ "$needs_reload" -eq 1 ]; then
|
||||
log "systemctl daemon-reload"
|
||||
systemctl daemon-reload
|
||||
fi
|
||||
|
||||
if systemctl is-enabled --quiet nomad.service 2>/dev/null; then
|
||||
log "nomad.service already enabled"
|
||||
else
|
||||
log "systemctl enable nomad"
|
||||
systemctl enable nomad.service >/dev/null
|
||||
fi
|
||||
# ── Install + reload + enable (shared with systemd-vault.sh via lib-systemd) ─
|
||||
systemd_install_unit "$UNIT_PATH" "nomad.service" "$DESIRED_UNIT"
|
||||
|
||||
log "done — unit installed and enabled (NOT started; S0.4 brings the cluster up)"
|
||||
|
|
|
|||
150
lib/init/nomad/systemd-vault.sh
Executable file
150
lib/init/nomad/systemd-vault.sh
Executable file
|
|
@ -0,0 +1,150 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# lib/init/nomad/systemd-vault.sh — Idempotent systemd unit installer for Vault
|
||||
#
|
||||
# Part of the Nomad+Vault migration (S0.3, issue #823). Lands three things:
|
||||
# 1. /etc/vault.d/ (0755 root:root)
|
||||
# 2. /etc/vault.d/vault.hcl (copy of nomad/vault.hcl, 0644 root:root)
|
||||
# 3. /var/lib/vault/data/ (0700 root:root, Vault file-storage backend)
|
||||
# 4. /etc/systemd/system/vault.service (0644 root:root)
|
||||
#
|
||||
# Then `systemctl enable vault` WITHOUT starting the service. Bootstrap
|
||||
# order is:
|
||||
# lib/init/nomad/install.sh (nomad + vault binaries)
|
||||
# lib/init/nomad/systemd-vault.sh (this script — unit + config + dirs)
|
||||
# lib/init/nomad/vault-init.sh (init + write unseal.key + unseal once)
|
||||
# systemctl start vault (ExecStartPost auto-unseals from file)
|
||||
#
|
||||
# The systemd unit's ExecStartPost reads /etc/vault.d/unseal.key and calls
|
||||
# `vault operator unseal`. That file is written by vault-init.sh on first
|
||||
# run; until it exists, `systemctl start vault` will leave Vault sealed
|
||||
# (ExecStartPost fails, unit goes into failed state — intentional, visible).
|
||||
#
|
||||
# Seal model:
|
||||
# The single unseal key lives at /etc/vault.d/unseal.key (0400 root).
|
||||
# Seal-key theft == vault theft. Dev-box acceptable; see docs/VAULT.md.
|
||||
#
|
||||
# Idempotency contract:
|
||||
# - Unit file NOT rewritten when on-disk content already matches desired.
|
||||
# - vault.hcl NOT rewritten when on-disk content matches the repo copy.
|
||||
# - `systemctl enable` on an already-enabled unit is a no-op.
|
||||
# - Safe to run unconditionally before every factory boot.
|
||||
#
|
||||
# Preconditions:
|
||||
# - vault binary installed (lib/init/nomad/install.sh)
|
||||
# - nomad/vault.hcl present in the repo (relative to this script)
|
||||
#
|
||||
# Usage:
|
||||
# sudo lib/init/nomad/systemd-vault.sh
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 success (unit+config installed + enabled, or already so)
|
||||
# 1 precondition failure (not root, no systemctl, no vault binary,
|
||||
# missing source config)
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
UNIT_PATH="/etc/systemd/system/vault.service"
|
||||
VAULT_CONFIG_DIR="/etc/vault.d"
|
||||
VAULT_CONFIG_FILE="${VAULT_CONFIG_DIR}/vault.hcl"
|
||||
VAULT_DATA_DIR="/var/lib/vault/data"
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
|
||||
VAULT_HCL_SRC="${REPO_ROOT}/nomad/vault.hcl"
|
||||
|
||||
log() { printf '[systemd-vault] %s\n' "$*"; }
|
||||
die() { printf '[systemd-vault] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||
|
||||
# shellcheck source=lib-systemd.sh
|
||||
. "${SCRIPT_DIR}/lib-systemd.sh"
|
||||
|
||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||
systemd_require_preconditions "$UNIT_PATH"
|
||||
|
||||
VAULT_BIN="$(command -v vault 2>/dev/null || true)"
|
||||
[ -n "$VAULT_BIN" ] \
|
||||
|| die "vault binary not found — run lib/init/nomad/install.sh first"
|
||||
|
||||
[ -f "$VAULT_HCL_SRC" ] \
|
||||
|| die "source config not found: ${VAULT_HCL_SRC}"
|
||||
|
||||
# ── Desired unit content ─────────────────────────────────────────────────────
|
||||
# Adapted from HashiCorp's recommended vault.service template
|
||||
# (https://developer.hashicorp.com/vault/tutorials/getting-started-deploy/deploy)
|
||||
# for a single-node factory dev box:
|
||||
# - User=root keeps the seal-key read path simple (unseal.key is 0400 root).
|
||||
# - CAP_IPC_LOCK lets mlock() succeed so disable_mlock=false is honoured.
|
||||
# Harmless when running as root; required if this is ever flipped to a
|
||||
# dedicated `vault` user.
|
||||
# - ExecStartPost auto-unseals on every boot using the persisted key.
|
||||
# This is the dev-persisted-seal tradeoff — seal-key theft == vault
|
||||
# theft, but no second Vault to babysit.
|
||||
# - ConditionFileNotEmpty guards against starting without config — makes
|
||||
# a missing vault.hcl visible in systemctl status, not a crash loop.
|
||||
# - Type=notify so systemd waits for Vault's listener-ready notification
|
||||
# before running ExecStartPost (ExecStartPost also has `sleep 2` as a
|
||||
# belt-and-braces guard against Type=notify edge cases).
|
||||
# - \$MAINPID is escaped so bash doesn't expand it inside this heredoc.
|
||||
# - \$(cat ...) is escaped so the subshell runs at unit-execution time
|
||||
# (inside bash -c), not at heredoc-expansion time here.
|
||||
read -r -d '' DESIRED_UNIT <<EOF || true
|
||||
[Unit]
|
||||
Description=HashiCorp Vault
|
||||
Documentation=https://developer.hashicorp.com/vault/docs
|
||||
Requires=network-online.target
|
||||
After=network-online.target
|
||||
ConditionFileNotEmpty=${VAULT_CONFIG_FILE}
|
||||
StartLimitIntervalSec=60
|
||||
StartLimitBurst=3
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=root
|
||||
Group=root
|
||||
Environment=VAULT_ADDR=http://127.0.0.1:8200
|
||||
SecureBits=keep-caps
|
||||
CapabilityBoundingSet=CAP_IPC_LOCK
|
||||
AmbientCapabilities=CAP_IPC_LOCK
|
||||
ExecStart=${VAULT_BIN} server -config=${VAULT_CONFIG_FILE}
|
||||
ExecStartPost=/bin/bash -c 'sleep 2 && ${VAULT_BIN} operator unseal \$(cat ${VAULT_CONFIG_DIR}/unseal.key)'
|
||||
ExecReload=/bin/kill --signal HUP \$MAINPID
|
||||
KillMode=process
|
||||
KillSignal=SIGINT
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
TimeoutStopSec=30
|
||||
LimitNOFILE=65536
|
||||
LimitMEMLOCK=infinity
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# ── Ensure config + data dirs exist ──────────────────────────────────────────
|
||||
# /etc/vault.d is 0755 — vault.hcl is world-readable (no secrets in it);
|
||||
# the real secrets (unseal.key, root.token) get their own 0400 mode.
|
||||
# /var/lib/vault/data is 0700 — vault's on-disk state (encrypted-at-rest
|
||||
# by Vault itself, but an extra layer of "don't rely on that").
|
||||
if [ ! -d "$VAULT_CONFIG_DIR" ]; then
|
||||
log "creating ${VAULT_CONFIG_DIR}"
|
||||
install -d -m 0755 -o root -g root "$VAULT_CONFIG_DIR"
|
||||
fi
|
||||
if [ ! -d "$VAULT_DATA_DIR" ]; then
|
||||
log "creating ${VAULT_DATA_DIR}"
|
||||
install -d -m 0700 -o root -g root "$VAULT_DATA_DIR"
|
||||
fi
|
||||
|
||||
# ── Install vault.hcl only if content differs ────────────────────────────────
|
||||
if [ ! -f "$VAULT_CONFIG_FILE" ] \
|
||||
|| ! cmp -s "$VAULT_HCL_SRC" "$VAULT_CONFIG_FILE"; then
|
||||
log "writing config → ${VAULT_CONFIG_FILE}"
|
||||
install -m 0644 -o root -g root "$VAULT_HCL_SRC" "$VAULT_CONFIG_FILE"
|
||||
else
|
||||
log "config already up to date"
|
||||
fi
|
||||
|
||||
# ── Install + reload + enable (shared with systemd-nomad.sh via lib-systemd) ─
|
||||
systemd_install_unit "$UNIT_PATH" "vault.service" "$DESIRED_UNIT"
|
||||
|
||||
log "done — unit+config installed and enabled (NOT started; vault-init.sh next)"
|
||||
193
lib/init/nomad/vault-init.sh
Executable file
193
lib/init/nomad/vault-init.sh
Executable file
|
|
@ -0,0 +1,193 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# lib/init/nomad/vault-init.sh — Idempotent Vault first-run initializer
|
||||
#
|
||||
# Part of the Nomad+Vault migration (S0.3, issue #823). Initializes Vault
|
||||
# in dev-persisted-seal mode (single unseal key on disk) and unseals once.
|
||||
# On re-run, becomes a no-op — never re-initializes or rotates the key.
|
||||
#
|
||||
# What it does (first run):
|
||||
# 1. Ensures Vault is reachable at ${VAULT_ADDR} — spawns a temporary
|
||||
# `vault server -config=/etc/vault.d/vault.hcl` if not already up.
|
||||
# 2. Runs `vault operator init -key-shares=1 -key-threshold=1` and
|
||||
# captures the resulting unseal key + root token.
|
||||
# 3. Writes /etc/vault.d/unseal.key (0400 root, no trailing newline).
|
||||
# 4. Writes /etc/vault.d/root.token (0400 root, no trailing newline).
|
||||
# 5. Unseals Vault once in the current process.
|
||||
# 6. Shuts down the temporary server if we started one (so a subsequent
|
||||
# `systemctl start vault` doesn't conflict on port 8200).
|
||||
#
|
||||
# Idempotency contract:
|
||||
# - /etc/vault.d/unseal.key exists AND `vault status` reports
|
||||
# initialized=true → exit 0, no mutation, no re-init.
|
||||
# - Initialized-but-unseal.key-missing is a hard failure (can't recover
|
||||
# the key without the existing storage; user must restore from backup).
|
||||
#
|
||||
# Bootstrap order:
|
||||
# lib/init/nomad/install.sh (installs vault binary)
|
||||
# lib/init/nomad/systemd-vault.sh (lands unit + config + dirs; enables)
|
||||
# lib/init/nomad/vault-init.sh (this script — init + unseal once)
|
||||
# systemctl start vault (ExecStartPost auto-unseals henceforth)
|
||||
#
|
||||
# Seal model:
|
||||
# Single unseal key persisted on disk at /etc/vault.d/unseal.key. Seal-key
|
||||
# theft == vault theft. Factory-dev-box-acceptable; see docs/VAULT.md.
|
||||
#
|
||||
# Environment:
|
||||
# VAULT_ADDR — Vault API address (default: http://127.0.0.1:8200).
|
||||
#
|
||||
# Usage:
|
||||
# sudo lib/init/nomad/vault-init.sh
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 success (initialized + unsealed + keys persisted; or already done)
|
||||
# 1 precondition / operational failure
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
VAULT_CONFIG_FILE="/etc/vault.d/vault.hcl"
|
||||
UNSEAL_KEY_FILE="/etc/vault.d/unseal.key"
|
||||
ROOT_TOKEN_FILE="/etc/vault.d/root.token"
|
||||
VAULT_ADDR="${VAULT_ADDR:-http://127.0.0.1:8200}"
|
||||
export VAULT_ADDR
|
||||
|
||||
# Track whether we spawned a temporary vault (for cleanup).
|
||||
spawned_pid=""
|
||||
spawned_log=""
|
||||
|
||||
log() { printf '[vault-init] %s\n' "$*"; }
|
||||
die() { printf '[vault-init] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||
|
||||
# ── Cleanup: stop the temporary server (if we started one) on any exit ───────
|
||||
# EXIT trap fires on success AND failure AND signals — so we never leak a
|
||||
# background vault process holding port 8200 after this script returns.
|
||||
cleanup() {
|
||||
if [ -n "$spawned_pid" ] && kill -0 "$spawned_pid" 2>/dev/null; then
|
||||
log "stopping temporary vault (pid=${spawned_pid})"
|
||||
kill "$spawned_pid" 2>/dev/null || true
|
||||
wait "$spawned_pid" 2>/dev/null || true
|
||||
fi
|
||||
if [ -n "$spawned_log" ] && [ -f "$spawned_log" ]; then
|
||||
rm -f "$spawned_log"
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
die "must run as root (needs to write 0400 files under /etc/vault.d)"
|
||||
fi
|
||||
|
||||
for bin in vault jq; do
|
||||
command -v "$bin" >/dev/null 2>&1 \
|
||||
|| die "required binary not found: ${bin}"
|
||||
done
|
||||
|
||||
[ -f "$VAULT_CONFIG_FILE" ] \
|
||||
|| die "config not found: ${VAULT_CONFIG_FILE} — run systemd-vault.sh first"
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
# vault_reachable — true iff `vault status` can reach the server.
|
||||
# Exit codes from `vault status`:
|
||||
# 0 = reachable, initialized, unsealed
|
||||
# 2 = reachable, sealed (or uninitialized)
|
||||
# 1 = unreachable / other error
|
||||
# We treat 0 and 2 as "reachable". `|| status=$?` avoids set -e tripping
|
||||
# on the expected sealed-is-also-fine case.
|
||||
vault_reachable() {
|
||||
local status=0
|
||||
vault status -format=json >/dev/null 2>&1 || status=$?
|
||||
[ "$status" -eq 0 ] || [ "$status" -eq 2 ]
|
||||
}
|
||||
|
||||
# vault_initialized — echoes "true" / "false" / "" (empty on parse failure).
|
||||
vault_initialized() {
|
||||
vault status -format=json 2>/dev/null | jq -r '.initialized' 2>/dev/null
|
||||
}
|
||||
|
||||
# write_secret_file PATH CONTENT
|
||||
# Write CONTENT to PATH atomically with 0400 root:root and no trailing
|
||||
# newline. mktemp+install keeps perms tight for the whole lifetime of
|
||||
# the file on disk — no 0644-then-chmod window.
|
||||
write_secret_file() {
|
||||
local path="$1" content="$2"
|
||||
local tmp
|
||||
tmp="$(mktemp)"
|
||||
printf '%s' "$content" > "$tmp"
|
||||
install -m 0400 -o root -g root "$tmp" "$path"
|
||||
rm -f "$tmp"
|
||||
}
|
||||
|
||||
# ── Ensure vault is reachable ────────────────────────────────────────────────
|
||||
if ! vault_reachable; then
|
||||
log "vault not reachable at ${VAULT_ADDR} — starting temporary server"
|
||||
spawned_log="$(mktemp)"
|
||||
vault server -config="$VAULT_CONFIG_FILE" >"$spawned_log" 2>&1 &
|
||||
spawned_pid=$!
|
||||
|
||||
# Poll for readiness. Vault's API listener comes up before notify-ready
|
||||
# in Type=notify mode, but well inside a few seconds even on cold boots.
|
||||
ready=0
|
||||
for _ in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
|
||||
if vault_reachable; then
|
||||
ready=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [ "$ready" -ne 1 ]; then
|
||||
log "vault did not become reachable within 15s — server log follows:"
|
||||
if [ -f "$spawned_log" ]; then
|
||||
sed 's/^/[vault-server] /' "$spawned_log" >&2 || true
|
||||
fi
|
||||
die "failed to start temporary vault server"
|
||||
fi
|
||||
log "temporary vault ready (pid=${spawned_pid})"
|
||||
fi
|
||||
|
||||
# ── Idempotency gate ─────────────────────────────────────────────────────────
|
||||
initialized="$(vault_initialized)"
|
||||
|
||||
if [ "$initialized" = "true" ] && [ -f "$UNSEAL_KEY_FILE" ]; then
|
||||
log "vault already initialized and unseal.key present — no-op"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$initialized" = "true" ] && [ ! -f "$UNSEAL_KEY_FILE" ]; then
|
||||
die "vault is initialized but ${UNSEAL_KEY_FILE} is missing — cannot recover the unseal key; restore from backup or wipe ${VAULT_CONFIG_FILE%/*}/data and re-run"
|
||||
fi
|
||||
|
||||
if [ "$initialized" != "false" ]; then
|
||||
die "unexpected initialized state: '${initialized}' (expected 'true' or 'false')"
|
||||
fi
|
||||
|
||||
# ── Initialize ───────────────────────────────────────────────────────────────
|
||||
log "initializing vault (key-shares=1, key-threshold=1)"
|
||||
init_json="$(vault operator init \
|
||||
-key-shares=1 \
|
||||
-key-threshold=1 \
|
||||
-format=json)" \
|
||||
|| die "vault operator init failed"
|
||||
|
||||
unseal_key="$(printf '%s' "$init_json" | jq -er '.unseal_keys_b64[0]')" \
|
||||
|| die "failed to extract unseal key from init response"
|
||||
root_token="$(printf '%s' "$init_json" | jq -er '.root_token')" \
|
||||
|| die "failed to extract root token from init response"
|
||||
|
||||
# Best-effort scrub of init_json from the env (the captured key+token still
|
||||
# sit in the local vars above — there's no clean way to wipe bash memory).
|
||||
unset init_json
|
||||
|
||||
# ── Persist keys ─────────────────────────────────────────────────────────────
|
||||
log "writing ${UNSEAL_KEY_FILE} (0400 root)"
|
||||
write_secret_file "$UNSEAL_KEY_FILE" "$unseal_key"
|
||||
log "writing ${ROOT_TOKEN_FILE} (0400 root)"
|
||||
write_secret_file "$ROOT_TOKEN_FILE" "$root_token"
|
||||
|
||||
# ── Unseal in the current process ────────────────────────────────────────────
|
||||
log "unsealing vault"
|
||||
vault operator unseal "$unseal_key" >/dev/null \
|
||||
|| die "vault operator unseal failed"
|
||||
|
||||
log "done — vault initialized + unsealed + keys persisted"
|
||||
Loading…
Add table
Add a link
Reference in a new issue