fix: [nomad-step-0] S0.3 — install vault + systemd auto-unseal + vault-init.sh (dev-persisted seal) (#823) #828
6 changed files with 540 additions and 68 deletions
|
|
@ -1,27 +1,30 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# lib/init/nomad/install.sh — Idempotent apt install of HashiCorp Nomad
|
# lib/init/nomad/install.sh — Idempotent apt install of HashiCorp Nomad + Vault
|
||||||
#
|
#
|
||||||
# Part of the Nomad+Vault migration (S0.2, issue #822). Installs the `nomad`
|
# Part of the Nomad+Vault migration. Installs both the `nomad` binary (S0.2,
|
||||||
# binary from the HashiCorp apt repository. Does NOT install Vault — S0.3
|
# issue #822) and the `vault` binary (S0.3, issue #823) from the same
|
||||||
# owns that. Does NOT configure, start, or enable a systemd unit —
|
# HashiCorp apt repository. Does NOT configure, start, or enable any systemd
|
||||||
# lib/init/nomad/systemd-nomad.sh owns that. Does NOT wire this script into
|
# unit — lib/init/nomad/systemd-nomad.sh and lib/init/nomad/systemd-vault.sh
|
||||||
# `disinto init` — S0.4 owns that.
|
# own that. Does NOT wire this script into `disinto init` — S0.4 owns that.
|
||||||
#
|
#
|
||||||
# Idempotency contract:
|
# Idempotency contract:
|
||||||
# - Running twice back-to-back is a no-op once the target version is
|
# - Running twice back-to-back is a no-op once both target versions are
|
||||||
# installed and the apt source is in place.
|
# installed and the apt source is in place.
|
||||||
# - Adds the HashiCorp apt keyring only if it is absent.
|
# - Adds the HashiCorp apt keyring only if it is absent.
|
||||||
# - Adds the HashiCorp apt sources list only if it is absent.
|
# - Adds the HashiCorp apt sources list only if it is absent.
|
||||||
# - Skips `apt-get install` entirely when the installed version already
|
# - Skips `apt-get install` for any package whose installed version already
|
||||||
# matches ${NOMAD_VERSION}.
|
# matches the pin. If both are at pin, exits before touching apt.
|
||||||
#
|
#
|
||||||
# Configuration:
|
# Configuration:
|
||||||
# NOMAD_VERSION — pinned Nomad version (default: see below). The apt
|
# NOMAD_VERSION — pinned Nomad version (default: see below). Apt package
|
||||||
# package name is versioned as "nomad=<version>-1".
|
# name is versioned as "nomad=<version>-1".
|
||||||
|
# VAULT_VERSION — pinned Vault version (default: see below). Apt package
|
||||||
|
# name is versioned as "vault=<version>-1".
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# sudo NOMAD_VERSION=1.9.5 lib/init/nomad/install.sh
|
# sudo lib/init/nomad/install.sh
|
||||||
|
# sudo NOMAD_VERSION=1.9.5 VAULT_VERSION=1.18.5 lib/init/nomad/install.sh
|
||||||
#
|
#
|
||||||
# Exit codes:
|
# Exit codes:
|
||||||
# 0 success (installed or already present)
|
# 0 success (installed or already present)
|
||||||
|
|
@ -29,16 +32,29 @@
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
# Pin to a specific Nomad 1.x release. Bump here, not at call sites.
|
# Pin to specific 1.x releases. Bump here, not at call sites.
|
||||||
NOMAD_VERSION="${NOMAD_VERSION:-1.9.5}"
|
NOMAD_VERSION="${NOMAD_VERSION:-1.9.5}"
|
||||||
|
VAULT_VERSION="${VAULT_VERSION:-1.18.5}"
|
||||||
|
|
||||||
HASHICORP_KEYRING="/usr/share/keyrings/hashicorp-archive-keyring.gpg"
|
HASHICORP_KEYRING="/usr/share/keyrings/hashicorp-archive-keyring.gpg"
|
||||||
HASHICORP_SOURCES="/etc/apt/sources.list.d/hashicorp.list"
|
HASHICORP_SOURCES="/etc/apt/sources.list.d/hashicorp.list"
|
||||||
HASHICORP_GPG_URL="https://apt.releases.hashicorp.com/gpg"
|
HASHICORP_GPG_URL="https://apt.releases.hashicorp.com/gpg"
|
||||||
HASHICORP_REPO_URL="https://apt.releases.hashicorp.com"
|
HASHICORP_REPO_URL="https://apt.releases.hashicorp.com"
|
||||||
|
|
||||||
log() { printf '[install-nomad] %s\n' "$*"; }
|
log() { printf '[install] %s\n' "$*"; }
|
||||||
die() { printf '[install-nomad] ERROR: %s\n' "$*" >&2; exit 1; }
|
die() { printf '[install] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||||
|
|
||||||
|
# _installed_version BINARY
|
||||||
|
# Echoes the installed semver for `nomad` or `vault` (e.g. "1.9.5").
|
||||||
|
# Both tools print their version on the first line of `<bin> version` as
|
||||||
|
# "<Name> v<semver>..." — the shared awk extracts $2 with the leading "v"
|
||||||
|
# stripped. Empty string when the binary is absent or output is unexpected.
|
||||||
|
_installed_version() {
|
||||||
|
local bin="$1"
|
||||||
|
command -v "$bin" >/dev/null 2>&1 || { printf ''; return 0; }
|
||||||
|
"$bin" version 2>/dev/null \
|
||||||
|
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}'
|
||||||
|
}
|
||||||
|
|
||||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||||
if [ "$(id -u)" -ne 0 ]; then
|
if [ "$(id -u)" -ne 0 ]; then
|
||||||
|
|
@ -53,16 +69,24 @@ done
|
||||||
CODENAME="$(lsb_release -cs)"
|
CODENAME="$(lsb_release -cs)"
|
||||||
[ -n "$CODENAME" ] || die "lsb_release returned empty codename"
|
[ -n "$CODENAME" ] || die "lsb_release returned empty codename"
|
||||||
|
|
||||||
# ── Fast-path: already at desired version? ───────────────────────────────────
|
# ── Fast-path: are both already at desired versions? ─────────────────────────
|
||||||
installed_version=""
|
nomad_installed="$(_installed_version nomad)"
|
||||||
if command -v nomad >/dev/null 2>&1; then
|
vault_installed="$(_installed_version vault)"
|
||||||
# `nomad version` prints e.g. "Nomad v1.9.5" on the first line.
|
|
||||||
installed_version="$(nomad version 2>/dev/null \
|
need_pkgs=()
|
||||||
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}')"
|
if [ "$nomad_installed" = "$NOMAD_VERSION" ]; then
|
||||||
|
log "nomad ${NOMAD_VERSION} already installed"
|
||||||
|
else
|
||||||
|
need_pkgs+=("nomad=${NOMAD_VERSION}-1")
|
||||||
|
fi
|
||||||
|
if [ "$vault_installed" = "$VAULT_VERSION" ]; then
|
||||||
|
log "vault ${VAULT_VERSION} already installed"
|
||||||
|
else
|
||||||
|
need_pkgs+=("vault=${VAULT_VERSION}-1")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$installed_version" = "$NOMAD_VERSION" ]; then
|
if [ "${#need_pkgs[@]}" -eq 0 ]; then
|
||||||
log "nomad ${NOMAD_VERSION} already installed — nothing to do"
|
log "nothing to do"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -94,25 +118,26 @@ else
|
||||||
apt_update_needed=0
|
apt_update_needed=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# ── Install the pinned version ───────────────────────────────────────────────
|
# ── Install the pinned versions ──────────────────────────────────────────────
|
||||||
if [ "$apt_update_needed" -eq 1 ]; then
|
if [ "$apt_update_needed" -eq 1 ]; then
|
||||||
log "running apt-get update"
|
log "running apt-get update"
|
||||||
DEBIAN_FRONTEND=noninteractive apt-get update -qq \
|
DEBIAN_FRONTEND=noninteractive apt-get update -qq \
|
||||||
|| die "apt-get update failed"
|
|| die "apt-get update failed"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# HashiCorp apt packages use the "<version>-1" package-revision suffix.
|
log "installing ${need_pkgs[*]}"
|
||||||
pkg_spec="nomad=${NOMAD_VERSION}-1"
|
|
||||||
log "installing ${pkg_spec}"
|
|
||||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
"$pkg_spec" \
|
"${need_pkgs[@]}" \
|
||||||
|| die "apt-get install ${pkg_spec} failed"
|
|| die "apt-get install ${need_pkgs[*]} failed"
|
||||||
|
|
||||||
# ── Verify ───────────────────────────────────────────────────────────────────
|
# ── Verify ───────────────────────────────────────────────────────────────────
|
||||||
final_version="$(nomad version 2>/dev/null \
|
final_nomad="$(_installed_version nomad)"
|
||||||
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}')"
|
if [ "$final_nomad" != "$NOMAD_VERSION" ]; then
|
||||||
if [ "$final_version" != "$NOMAD_VERSION" ]; then
|
die "post-install check: expected nomad ${NOMAD_VERSION}, got '${final_nomad}'"
|
||||||
die "post-install check: expected ${NOMAD_VERSION}, got '${final_version}'"
|
fi
|
||||||
|
final_vault="$(_installed_version vault)"
|
||||||
|
if [ "$final_vault" != "$VAULT_VERSION" ]; then
|
||||||
|
die "post-install check: expected vault ${VAULT_VERSION}, got '${final_vault}'"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "nomad ${NOMAD_VERSION} installed successfully"
|
log "nomad ${NOMAD_VERSION} + vault ${VAULT_VERSION} installed successfully"
|
||||||
|
|
|
||||||
77
lib/init/nomad/lib-systemd.sh
Normal file
77
lib/init/nomad/lib-systemd.sh
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# lib/init/nomad/lib-systemd.sh — Shared idempotent systemd-unit installer
|
||||||
|
#
|
||||||
|
# Sourced by lib/init/nomad/systemd-nomad.sh and lib/init/nomad/systemd-vault.sh
|
||||||
|
# (and any future sibling) to collapse the "write unit if content differs,
|
||||||
|
# daemon-reload, enable (never start)" boilerplate.
|
||||||
|
#
|
||||||
|
# Install-but-don't-start is the invariant this helper enforces — mid-migration
|
||||||
|
# installers land files and enable units; the orchestrator (S0.4) starts them.
|
||||||
|
#
|
||||||
|
# Public API (sourced into caller scope):
|
||||||
|
#
|
||||||
|
# systemd_require_preconditions UNIT_PATH
|
||||||
|
# Asserts the caller is uid 0 and `systemctl` is on $PATH. Calls the
|
||||||
|
# caller's die() with a UNIT_PATH-scoped message on failure.
|
||||||
|
#
|
||||||
|
# systemd_install_unit UNIT_PATH UNIT_NAME UNIT_CONTENT
|
||||||
|
# Writes UNIT_CONTENT to UNIT_PATH (0644 root:root) only if on-disk
|
||||||
|
# content differs. If written, runs `systemctl daemon-reload`. Then
|
||||||
|
# enables UNIT_NAME (no-op if already enabled). Never starts the unit.
|
||||||
|
#
|
||||||
|
# Caller contract:
|
||||||
|
# - Callers MUST define `log()` and `die()` before sourcing this file (we
|
||||||
|
# call log() for status chatter and rely on the caller's error-handling
|
||||||
|
# stance; `set -e` propagates install/cmp/systemctl failures).
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# systemd_require_preconditions UNIT_PATH
|
||||||
|
systemd_require_preconditions() {
|
||||||
|
local unit_path="$1"
|
||||||
|
if [ "$(id -u)" -ne 0 ]; then
|
||||||
|
die "must run as root (needs write access to ${unit_path})"
|
||||||
|
fi
|
||||||
|
command -v systemctl >/dev/null 2>&1 \
|
||||||
|
|| die "systemctl not found (systemd is required)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# systemd_install_unit UNIT_PATH UNIT_NAME UNIT_CONTENT
|
||||||
|
systemd_install_unit() {
|
||||||
|
local unit_path="$1"
|
||||||
|
local unit_name="$2"
|
||||||
|
local unit_content="$3"
|
||||||
|
|
||||||
|
local needs_reload=0
|
||||||
|
if [ ! -f "$unit_path" ] \
|
||||||
|
|| ! printf '%s\n' "$unit_content" | cmp -s - "$unit_path"; then
|
||||||
|
log "writing unit → ${unit_path}"
|
||||||
|
# Subshell-scoped EXIT trap guarantees the temp file is removed on
|
||||||
|
# both success AND set-e-induced failure of `install`. A function-
|
||||||
|
# scoped RETURN trap does NOT fire on errexit-abort (bash only runs
|
||||||
|
# RETURN on normal function exit), so the subshell is the reliable
|
||||||
|
# cleanup boundary. It's also isolated from the caller's EXIT trap.
|
||||||
|
(
|
||||||
|
local tmp
|
||||||
|
tmp="$(mktemp)"
|
||||||
|
trap 'rm -f "$tmp"' EXIT
|
||||||
|
printf '%s\n' "$unit_content" > "$tmp"
|
||||||
|
install -m 0644 -o root -g root "$tmp" "$unit_path"
|
||||||
|
)
|
||||||
|
needs_reload=1
|
||||||
|
else
|
||||||
|
log "unit file already up to date"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$needs_reload" -eq 1 ]; then
|
||||||
|
log "systemctl daemon-reload"
|
||||||
|
systemctl daemon-reload
|
||||||
|
fi
|
||||||
|
|
||||||
|
if systemctl is-enabled --quiet "$unit_name" 2>/dev/null; then
|
||||||
|
log "${unit_name} already enabled"
|
||||||
|
else
|
||||||
|
log "systemctl enable ${unit_name}"
|
||||||
|
systemctl enable "$unit_name" >/dev/null
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
@ -33,13 +33,11 @@ NOMAD_DATA_DIR="/var/lib/nomad"
|
||||||
log() { printf '[systemd-nomad] %s\n' "$*"; }
|
log() { printf '[systemd-nomad] %s\n' "$*"; }
|
||||||
die() { printf '[systemd-nomad] ERROR: %s\n' "$*" >&2; exit 1; }
|
die() { printf '[systemd-nomad] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||||
|
|
||||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
# shellcheck source=lib-systemd.sh
|
||||||
if [ "$(id -u)" -ne 0 ]; then
|
. "$(dirname "${BASH_SOURCE[0]}")/lib-systemd.sh"
|
||||||
die "must run as root (needs write access to ${UNIT_PATH})"
|
|
||||||
fi
|
|
||||||
|
|
||||||
command -v systemctl >/dev/null 2>&1 \
|
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||||
|| die "systemctl not found (systemd is required)"
|
systemd_require_preconditions "$UNIT_PATH"
|
||||||
|
|
||||||
NOMAD_BIN="$(command -v nomad 2>/dev/null || true)"
|
NOMAD_BIN="$(command -v nomad 2>/dev/null || true)"
|
||||||
[ -n "$NOMAD_BIN" ] \
|
[ -n "$NOMAD_BIN" ] \
|
||||||
|
|
@ -98,33 +96,7 @@ for d in "$NOMAD_CONFIG_DIR" "$NOMAD_DATA_DIR"; do
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# ── Install unit file only if content differs ────────────────────────────────
|
# ── Install + reload + enable (shared with systemd-vault.sh via lib-systemd) ─
|
||||||
needs_reload=0
|
systemd_install_unit "$UNIT_PATH" "nomad.service" "$DESIRED_UNIT"
|
||||||
if [ ! -f "$UNIT_PATH" ] \
|
|
||||||
|| ! printf '%s\n' "$DESIRED_UNIT" | cmp -s - "$UNIT_PATH"; then
|
|
||||||
log "writing unit → ${UNIT_PATH}"
|
|
||||||
tmp="$(mktemp)"
|
|
||||||
trap 'rm -f "$tmp"' EXIT
|
|
||||||
printf '%s\n' "$DESIRED_UNIT" > "$tmp"
|
|
||||||
install -m 0644 -o root -g root "$tmp" "$UNIT_PATH"
|
|
||||||
rm -f "$tmp"
|
|
||||||
trap - EXIT
|
|
||||||
needs_reload=1
|
|
||||||
else
|
|
||||||
log "unit file already up to date"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ── Reload + enable ──────────────────────────────────────────────────────────
|
|
||||||
if [ "$needs_reload" -eq 1 ]; then
|
|
||||||
log "systemctl daemon-reload"
|
|
||||||
systemctl daemon-reload
|
|
||||||
fi
|
|
||||||
|
|
||||||
if systemctl is-enabled --quiet nomad.service 2>/dev/null; then
|
|
||||||
log "nomad.service already enabled"
|
|
||||||
else
|
|
||||||
log "systemctl enable nomad"
|
|
||||||
systemctl enable nomad.service >/dev/null
|
|
||||||
fi
|
|
||||||
|
|
||||||
log "done — unit installed and enabled (NOT started; S0.4 brings the cluster up)"
|
log "done — unit installed and enabled (NOT started; S0.4 brings the cluster up)"
|
||||||
|
|
|
||||||
151
lib/init/nomad/systemd-vault.sh
Executable file
151
lib/init/nomad/systemd-vault.sh
Executable file
|
|
@ -0,0 +1,151 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# lib/init/nomad/systemd-vault.sh — Idempotent systemd unit installer for Vault
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S0.3, issue #823). Lands three things:
|
||||||
|
# 1. /etc/vault.d/ (0755 root:root)
|
||||||
|
# 2. /etc/vault.d/vault.hcl (copy of nomad/vault.hcl, 0644 root:root)
|
||||||
|
# 3. /var/lib/vault/data/ (0700 root:root, Vault file-storage backend)
|
||||||
|
# 4. /etc/systemd/system/vault.service (0644 root:root)
|
||||||
|
#
|
||||||
|
# Then `systemctl enable vault` WITHOUT starting the service. Bootstrap
|
||||||
|
# order is:
|
||||||
|
# lib/init/nomad/install.sh (nomad + vault binaries)
|
||||||
|
# lib/init/nomad/systemd-vault.sh (this script — unit + config + dirs)
|
||||||
|
# lib/init/nomad/vault-init.sh (init + write unseal.key + unseal once)
|
||||||
|
# systemctl start vault (ExecStartPost auto-unseals from file)
|
||||||
|
#
|
||||||
|
# The systemd unit's ExecStartPost reads /etc/vault.d/unseal.key and calls
|
||||||
|
# `vault operator unseal`. That file is written by vault-init.sh on first
|
||||||
|
# run; until it exists, `systemctl start vault` will leave Vault sealed
|
||||||
|
# (ExecStartPost fails, unit goes into failed state — intentional, visible).
|
||||||
|
#
|
||||||
|
# Seal model:
|
||||||
|
# The single unseal key lives at /etc/vault.d/unseal.key (0400 root).
|
||||||
|
# Seal-key theft == vault theft. Factory-dev-box-acceptable tradeoff —
|
||||||
|
# we avoid running a second Vault to auto-unseal the first.
|
||||||
|
#
|
||||||
|
# Idempotency contract:
|
||||||
|
# - Unit file NOT rewritten when on-disk content already matches desired.
|
||||||
|
# - vault.hcl NOT rewritten when on-disk content matches the repo copy.
|
||||||
|
# - `systemctl enable` on an already-enabled unit is a no-op.
|
||||||
|
# - Safe to run unconditionally before every factory boot.
|
||||||
|
#
|
||||||
|
# Preconditions:
|
||||||
|
# - vault binary installed (lib/init/nomad/install.sh)
|
||||||
|
# - nomad/vault.hcl present in the repo (relative to this script)
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# sudo lib/init/nomad/systemd-vault.sh
|
||||||
|
#
|
||||||
|
# Exit codes:
|
||||||
|
# 0 success (unit+config installed + enabled, or already so)
|
||||||
|
# 1 precondition failure (not root, no systemctl, no vault binary,
|
||||||
|
# missing source config)
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
UNIT_PATH="/etc/systemd/system/vault.service"
|
||||||
|
VAULT_CONFIG_DIR="/etc/vault.d"
|
||||||
|
VAULT_CONFIG_FILE="${VAULT_CONFIG_DIR}/vault.hcl"
|
||||||
|
VAULT_DATA_DIR="/var/lib/vault/data"
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
|
||||||
|
VAULT_HCL_SRC="${REPO_ROOT}/nomad/vault.hcl"
|
||||||
|
|
||||||
|
log() { printf '[systemd-vault] %s\n' "$*"; }
|
||||||
|
die() { printf '[systemd-vault] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||||
|
|
||||||
|
# shellcheck source=lib-systemd.sh
|
||||||
|
. "${SCRIPT_DIR}/lib-systemd.sh"
|
||||||
|
|
||||||
|
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||||
|
systemd_require_preconditions "$UNIT_PATH"
|
||||||
|
|
||||||
|
VAULT_BIN="$(command -v vault 2>/dev/null || true)"
|
||||||
|
[ -n "$VAULT_BIN" ] \
|
||||||
|
|| die "vault binary not found — run lib/init/nomad/install.sh first"
|
||||||
|
|
||||||
|
[ -f "$VAULT_HCL_SRC" ] \
|
||||||
|
|| die "source config not found: ${VAULT_HCL_SRC}"
|
||||||
|
|
||||||
|
# ── Desired unit content ─────────────────────────────────────────────────────
|
||||||
|
# Adapted from HashiCorp's recommended vault.service template
|
||||||
|
# (https://developer.hashicorp.com/vault/tutorials/getting-started-deploy/deploy)
|
||||||
|
# for a single-node factory dev box:
|
||||||
|
# - User=root keeps the seal-key read path simple (unseal.key is 0400 root).
|
||||||
|
# - CAP_IPC_LOCK lets mlock() succeed so disable_mlock=false is honoured.
|
||||||
|
# Harmless when running as root; required if this is ever flipped to a
|
||||||
|
# dedicated `vault` user.
|
||||||
|
# - ExecStartPost auto-unseals on every boot using the persisted key.
|
||||||
|
# This is the dev-persisted-seal tradeoff — seal-key theft == vault
|
||||||
|
# theft, but no second Vault to babysit.
|
||||||
|
# - ConditionFileNotEmpty guards against starting without config — makes
|
||||||
|
# a missing vault.hcl visible in systemctl status, not a crash loop.
|
||||||
|
# - Type=notify so systemd waits for Vault's listener-ready notification
|
||||||
|
# before running ExecStartPost (ExecStartPost also has `sleep 2` as a
|
||||||
|
# belt-and-braces guard against Type=notify edge cases).
|
||||||
|
# - \$MAINPID is escaped so bash doesn't expand it inside this heredoc.
|
||||||
|
# - \$(cat ...) is escaped so the subshell runs at unit-execution time
|
||||||
|
# (inside bash -c), not at heredoc-expansion time here.
|
||||||
|
read -r -d '' DESIRED_UNIT <<EOF || true
|
||||||
|
[Unit]
|
||||||
|
Description=HashiCorp Vault
|
||||||
|
Documentation=https://developer.hashicorp.com/vault/docs
|
||||||
|
Requires=network-online.target
|
||||||
|
After=network-online.target
|
||||||
|
ConditionFileNotEmpty=${VAULT_CONFIG_FILE}
|
||||||
|
StartLimitIntervalSec=60
|
||||||
|
StartLimitBurst=3
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=notify
|
||||||
|
User=root
|
||||||
|
Group=root
|
||||||
|
Environment=VAULT_ADDR=http://127.0.0.1:8200
|
||||||
|
SecureBits=keep-caps
|
||||||
|
CapabilityBoundingSet=CAP_IPC_LOCK
|
||||||
|
AmbientCapabilities=CAP_IPC_LOCK
|
||||||
|
ExecStart=${VAULT_BIN} server -config=${VAULT_CONFIG_FILE}
|
||||||
|
ExecStartPost=/bin/bash -c 'sleep 2 && ${VAULT_BIN} operator unseal \$(cat ${VAULT_CONFIG_DIR}/unseal.key)'
|
||||||
|
ExecReload=/bin/kill --signal HUP \$MAINPID
|
||||||
|
KillMode=process
|
||||||
|
KillSignal=SIGINT
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5
|
||||||
|
TimeoutStopSec=30
|
||||||
|
LimitNOFILE=65536
|
||||||
|
LimitMEMLOCK=infinity
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# ── Ensure config + data dirs exist ──────────────────────────────────────────
|
||||||
|
# /etc/vault.d is 0755 — vault.hcl is world-readable (no secrets in it);
|
||||||
|
# the real secrets (unseal.key, root.token) get their own 0400 mode.
|
||||||
|
# /var/lib/vault/data is 0700 — vault's on-disk state (encrypted-at-rest
|
||||||
|
# by Vault itself, but an extra layer of "don't rely on that").
|
||||||
|
if [ ! -d "$VAULT_CONFIG_DIR" ]; then
|
||||||
|
log "creating ${VAULT_CONFIG_DIR}"
|
||||||
|
install -d -m 0755 -o root -g root "$VAULT_CONFIG_DIR"
|
||||||
|
fi
|
||||||
|
if [ ! -d "$VAULT_DATA_DIR" ]; then
|
||||||
|
log "creating ${VAULT_DATA_DIR}"
|
||||||
|
install -d -m 0700 -o root -g root "$VAULT_DATA_DIR"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Install vault.hcl only if content differs ────────────────────────────────
|
||||||
|
if [ ! -f "$VAULT_CONFIG_FILE" ] \
|
||||||
|
|| ! cmp -s "$VAULT_HCL_SRC" "$VAULT_CONFIG_FILE"; then
|
||||||
|
log "writing config → ${VAULT_CONFIG_FILE}"
|
||||||
|
install -m 0644 -o root -g root "$VAULT_HCL_SRC" "$VAULT_CONFIG_FILE"
|
||||||
|
else
|
||||||
|
log "config already up to date"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Install + reload + enable (shared with systemd-nomad.sh via lib-systemd) ─
|
||||||
|
systemd_install_unit "$UNIT_PATH" "vault.service" "$DESIRED_UNIT"
|
||||||
|
|
||||||
|
log "done — unit+config installed and enabled (NOT started; vault-init.sh next)"
|
||||||
206
lib/init/nomad/vault-init.sh
Executable file
206
lib/init/nomad/vault-init.sh
Executable file
|
|
@ -0,0 +1,206 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# lib/init/nomad/vault-init.sh — Idempotent Vault first-run initializer
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S0.3, issue #823). Initializes Vault
|
||||||
|
# in dev-persisted-seal mode (single unseal key on disk) and unseals once.
|
||||||
|
# On re-run, becomes a no-op — never re-initializes or rotates the key.
|
||||||
|
#
|
||||||
|
# What it does (first run):
|
||||||
|
# 1. Ensures Vault is reachable at ${VAULT_ADDR} — spawns a temporary
|
||||||
|
# `vault server -config=/etc/vault.d/vault.hcl` if not already up.
|
||||||
|
# 2. Runs `vault operator init -key-shares=1 -key-threshold=1` and
|
||||||
|
# captures the resulting unseal key + root token.
|
||||||
|
# 3. Writes /etc/vault.d/unseal.key (0400 root, no trailing newline).
|
||||||
|
# 4. Writes /etc/vault.d/root.token (0400 root, no trailing newline).
|
||||||
|
# 5. Unseals Vault once in the current process.
|
||||||
|
# 6. Shuts down the temporary server if we started one (so a subsequent
|
||||||
|
# `systemctl start vault` doesn't conflict on port 8200).
|
||||||
|
#
|
||||||
|
# Idempotency contract:
|
||||||
|
# - /etc/vault.d/unseal.key exists AND `vault status` reports
|
||||||
|
# initialized=true → exit 0, no mutation, no re-init.
|
||||||
|
# - Initialized-but-unseal.key-missing is a hard failure (can't recover
|
||||||
|
# the key without the existing storage; user must restore from backup).
|
||||||
|
#
|
||||||
|
# Bootstrap order:
|
||||||
|
# lib/init/nomad/install.sh (installs vault binary)
|
||||||
|
# lib/init/nomad/systemd-vault.sh (lands unit + config + dirs; enables)
|
||||||
|
# lib/init/nomad/vault-init.sh (this script — init + unseal once)
|
||||||
|
# systemctl start vault (ExecStartPost auto-unseals henceforth)
|
||||||
|
#
|
||||||
|
# Seal model:
|
||||||
|
# Single unseal key persisted on disk at /etc/vault.d/unseal.key. Seal-key
|
||||||
|
# theft == vault theft. Factory-dev-box-acceptable tradeoff — we avoid
|
||||||
|
# running a second Vault to auto-unseal the first.
|
||||||
|
#
|
||||||
|
# Environment:
|
||||||
|
# VAULT_ADDR — Vault API address (default: http://127.0.0.1:8200).
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# sudo lib/init/nomad/vault-init.sh
|
||||||
|
#
|
||||||
|
# Exit codes:
|
||||||
|
# 0 success (initialized + unsealed + keys persisted; or already done)
|
||||||
|
# 1 precondition / operational failure
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
VAULT_CONFIG_FILE="/etc/vault.d/vault.hcl"
|
||||||
|
UNSEAL_KEY_FILE="/etc/vault.d/unseal.key"
|
||||||
|
ROOT_TOKEN_FILE="/etc/vault.d/root.token"
|
||||||
|
VAULT_ADDR="${VAULT_ADDR:-http://127.0.0.1:8200}"
|
||||||
|
export VAULT_ADDR
|
||||||
|
|
||||||
|
# Track whether we spawned a temporary vault (for cleanup).
|
||||||
|
spawned_pid=""
|
||||||
|
spawned_log=""
|
||||||
|
|
||||||
|
log() { printf '[vault-init] %s\n' "$*"; }
|
||||||
|
die() { printf '[vault-init] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||||
|
|
||||||
|
# ── Cleanup: stop the temporary server (if we started one) on any exit ───────
|
||||||
|
# EXIT trap fires on success AND failure AND signals — so we never leak a
|
||||||
|
# background vault process holding port 8200 after this script returns.
|
||||||
|
cleanup() {
|
||||||
|
if [ -n "$spawned_pid" ] && kill -0 "$spawned_pid" 2>/dev/null; then
|
||||||
|
log "stopping temporary vault (pid=${spawned_pid})"
|
||||||
|
kill "$spawned_pid" 2>/dev/null || true
|
||||||
|
wait "$spawned_pid" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
if [ -n "$spawned_log" ] && [ -f "$spawned_log" ]; then
|
||||||
|
rm -f "$spawned_log"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||||
|
if [ "$(id -u)" -ne 0 ]; then
|
||||||
|
die "must run as root (needs to write 0400 files under /etc/vault.d)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
for bin in vault jq; do
|
||||||
|
command -v "$bin" >/dev/null 2>&1 \
|
||||||
|
|| die "required binary not found: ${bin}"
|
||||||
|
done
|
||||||
|
|
||||||
|
[ -f "$VAULT_CONFIG_FILE" ] \
|
||||||
|
|| die "config not found: ${VAULT_CONFIG_FILE} — run systemd-vault.sh first"
|
||||||
|
|
||||||
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# vault_reachable — true iff `vault status` can reach the server.
|
||||||
|
# Exit codes from `vault status`:
|
||||||
|
# 0 = reachable, initialized, unsealed
|
||||||
|
# 2 = reachable, sealed (or uninitialized)
|
||||||
|
# 1 = unreachable / other error
|
||||||
|
# We treat 0 and 2 as "reachable". `|| status=$?` avoids set -e tripping
|
||||||
|
# on the expected sealed-is-also-fine case.
|
||||||
|
vault_reachable() {
|
||||||
|
local status=0
|
||||||
|
vault status -format=json >/dev/null 2>&1 || status=$?
|
||||||
|
[ "$status" -eq 0 ] || [ "$status" -eq 2 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
# vault_initialized — echoes "true" / "false" / "" (empty on parse failure
|
||||||
|
# or unreachable vault). Always returns 0 so that `x="$(vault_initialized)"`
|
||||||
|
# is safe under `set -euo pipefail`.
|
||||||
|
#
|
||||||
|
# Key subtlety: `vault status` exits 2 when Vault is sealed OR uninitialized
|
||||||
|
# — the exact state we need to *observe* on first run. Without the
|
||||||
|
# `|| true` guard, pipefail + set -e inside a standalone assignment would
|
||||||
|
# propagate that exit 2 to the outer script and abort before we ever call
|
||||||
|
# `vault operator init`. We capture `vault status`'s output to a variable
|
||||||
|
# first (pipefail-safe), then feed it to jq separately.
|
||||||
|
vault_initialized() {
|
||||||
|
local out=""
|
||||||
|
out="$(vault status -format=json 2>/dev/null || true)"
|
||||||
|
[ -n "$out" ] || { printf ''; return 0; }
|
||||||
|
printf '%s' "$out" | jq -r '.initialized' 2>/dev/null || printf ''
|
||||||
|
}
|
||||||
|
|
||||||
|
# write_secret_file PATH CONTENT
|
||||||
|
# Write CONTENT to PATH atomically with 0400 root:root and no trailing
|
||||||
|
# newline. mktemp+install keeps perms tight for the whole lifetime of
|
||||||
|
# the file on disk — no 0644-then-chmod window.
|
||||||
|
write_secret_file() {
|
||||||
|
local path="$1" content="$2"
|
||||||
|
local tmp
|
||||||
|
tmp="$(mktemp)"
|
||||||
|
printf '%s' "$content" > "$tmp"
|
||||||
|
install -m 0400 -o root -g root "$tmp" "$path"
|
||||||
|
rm -f "$tmp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Ensure vault is reachable ────────────────────────────────────────────────
|
||||||
|
if ! vault_reachable; then
|
||||||
|
log "vault not reachable at ${VAULT_ADDR} — starting temporary server"
|
||||||
|
spawned_log="$(mktemp)"
|
||||||
|
vault server -config="$VAULT_CONFIG_FILE" >"$spawned_log" 2>&1 &
|
||||||
|
spawned_pid=$!
|
||||||
|
|
||||||
|
# Poll for readiness. Vault's API listener comes up before notify-ready
|
||||||
|
# in Type=notify mode, but well inside a few seconds even on cold boots.
|
||||||
|
ready=0
|
||||||
|
for _ in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
|
||||||
|
if vault_reachable; then
|
||||||
|
ready=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
if [ "$ready" -ne 1 ]; then
|
||||||
|
log "vault did not become reachable within 15s — server log follows:"
|
||||||
|
if [ -f "$spawned_log" ]; then
|
||||||
|
sed 's/^/[vault-server] /' "$spawned_log" >&2 || true
|
||||||
|
fi
|
||||||
|
die "failed to start temporary vault server"
|
||||||
|
fi
|
||||||
|
log "temporary vault ready (pid=${spawned_pid})"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Idempotency gate ─────────────────────────────────────────────────────────
|
||||||
|
initialized="$(vault_initialized)"
|
||||||
|
|
||||||
|
if [ "$initialized" = "true" ] && [ -f "$UNSEAL_KEY_FILE" ]; then
|
||||||
|
log "vault already initialized and unseal.key present — no-op"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$initialized" = "true" ] && [ ! -f "$UNSEAL_KEY_FILE" ]; then
|
||||||
|
die "vault is initialized but ${UNSEAL_KEY_FILE} is missing — cannot recover the unseal key; restore from backup or wipe ${VAULT_CONFIG_FILE%/*}/data and re-run"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$initialized" != "false" ]; then
|
||||||
|
die "unexpected initialized state: '${initialized}' (expected 'true' or 'false')"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Initialize ───────────────────────────────────────────────────────────────
|
||||||
|
log "initializing vault (key-shares=1, key-threshold=1)"
|
||||||
|
init_json="$(vault operator init \
|
||||||
|
-key-shares=1 \
|
||||||
|
-key-threshold=1 \
|
||||||
|
-format=json)" \
|
||||||
|
|| die "vault operator init failed"
|
||||||
|
|
||||||
|
unseal_key="$(printf '%s' "$init_json" | jq -er '.unseal_keys_b64[0]')" \
|
||||||
|
|| die "failed to extract unseal key from init response"
|
||||||
|
root_token="$(printf '%s' "$init_json" | jq -er '.root_token')" \
|
||||||
|
|| die "failed to extract root token from init response"
|
||||||
|
|
||||||
|
# Best-effort scrub of init_json from the env (the captured key+token still
|
||||||
|
# sit in the local vars above — there's no clean way to wipe bash memory).
|
||||||
|
unset init_json
|
||||||
|
|
||||||
|
# ── Persist keys ─────────────────────────────────────────────────────────────
|
||||||
|
log "writing ${UNSEAL_KEY_FILE} (0400 root)"
|
||||||
|
write_secret_file "$UNSEAL_KEY_FILE" "$unseal_key"
|
||||||
|
log "writing ${ROOT_TOKEN_FILE} (0400 root)"
|
||||||
|
write_secret_file "$ROOT_TOKEN_FILE" "$root_token"
|
||||||
|
|
||||||
|
# ── Unseal in the current process ────────────────────────────────────────────
|
||||||
|
log "unsealing vault"
|
||||||
|
vault operator unseal "$unseal_key" >/dev/null \
|
||||||
|
|| die "vault operator unseal failed"
|
||||||
|
|
||||||
|
log "done — vault initialized + unsealed + keys persisted"
|
||||||
41
nomad/vault.hcl
Normal file
41
nomad/vault.hcl
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
# =============================================================================
|
||||||
|
# nomad/vault.hcl — Single-node Vault configuration (dev-persisted seal)
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S0.3, issue #823). Deployed to
|
||||||
|
# /etc/vault.d/vault.hcl on the factory dev box.
|
||||||
|
#
|
||||||
|
# Seal model: the single unseal key lives on disk at /etc/vault.d/unseal.key
|
||||||
|
# (0400 root) and is read by systemd ExecStartPost on every boot. This is
|
||||||
|
# the factory-dev-box-acceptable tradeoff — seal-key theft equals vault
|
||||||
|
# theft, but we avoid running a second Vault to auto-unseal the first.
|
||||||
|
#
|
||||||
|
# This is a factory dev-box baseline — TLS, HA, Raft storage, and audit
|
||||||
|
# devices are deliberately absent. Storage is the `file` backend (single
|
||||||
|
# node only). Listener is localhost-only, so no external TLS is needed.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# File storage backend — single-node only, no HA, no raft. State lives in
|
||||||
|
# /var/lib/vault/data which is created (root:root 0700) by
|
||||||
|
# lib/init/nomad/systemd-vault.sh before the unit starts.
|
||||||
|
storage "file" {
|
||||||
|
path = "/var/lib/vault/data"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Localhost-only listener. TLS is disabled because all callers are on the
|
||||||
|
# same box — flipping this to tls_disable=false is an audit-worthy change
|
||||||
|
# paired with cert provisioning.
|
||||||
|
listener "tcp" {
|
||||||
|
address = "127.0.0.1:8200"
|
||||||
|
tls_disable = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# mlock prevents Vault's in-memory secrets from being swapped to disk. We
|
||||||
|
# keep it enabled; the systemd unit grants CAP_IPC_LOCK so mlock() succeeds.
|
||||||
|
disable_mlock = false
|
||||||
|
|
||||||
|
# Advertised API address — used by Vault clients on this host. Matches
|
||||||
|
# the listener above.
|
||||||
|
api_addr = "http://127.0.0.1:8200"
|
||||||
|
|
||||||
|
# UI on by default — same bind as listener, no TLS (localhost only).
|
||||||
|
ui = true
|
||||||
Loading…
Add table
Add a link
Reference in a new issue