fix: [nomad-step-0] S0.2 — install nomad + systemd unit + nomad/server.hcl/client.hcl (#822)
Lands the Nomad install + baseline HCL config for the single-node factory dev box. Nothing is wired into `disinto init` yet — S0.4 does that. - lib/init/nomad/install.sh: idempotent apt install pinned to NOMAD_VERSION (default 1.9.5). Adds HashiCorp apt keyring and sources list only if absent; fast-paths when the pinned version is already installed. - lib/init/nomad/systemd-nomad.sh: writes /etc/systemd/system/nomad.service (rewrites only when content differs), creates /etc/nomad.d and /var/lib/nomad, runs `systemctl enable nomad` WITHOUT starting. - nomad/server.hcl: single-node combined server+client role. bootstrap_expect=1, localhost bind, default ports pinned explicitly, UI enabled. No TLS/ACL — factory dev box baseline. - nomad/client.hcl: Docker task driver (allow_privileged=false, volumes enabled) and host_volume pre-wiring for forgejo-data, woodpecker-data, agent-data, project-repos, caddy-data, chat-history, ops-repo under /srv/disinto/*. Verified: `nomad config validate nomad/*.hcl` reports "Configuration is valid!" (with expected TLS/bootstrap warnings for a dev box). Shellcheck clean across the repo. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
74f49e1c2f
commit
06ead3a19d
4 changed files with 389 additions and 0 deletions
118
lib/init/nomad/install.sh
Executable file
118
lib/init/nomad/install.sh
Executable file
|
|
@ -0,0 +1,118 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# lib/init/nomad/install.sh — Idempotent apt install of HashiCorp Nomad
|
||||
#
|
||||
# Part of the Nomad+Vault migration (S0.2, issue #822). Installs the `nomad`
|
||||
# binary from the HashiCorp apt repository. Does NOT install Vault — S0.3
|
||||
# owns that. Does NOT configure, start, or enable a systemd unit —
|
||||
# lib/init/nomad/systemd-nomad.sh owns that. Does NOT wire this script into
|
||||
# `disinto init` — S0.4 owns that.
|
||||
#
|
||||
# Idempotency contract:
|
||||
# - Running twice back-to-back is a no-op once the target version is
|
||||
# installed and the apt source is in place.
|
||||
# - Adds the HashiCorp apt keyring only if it is absent.
|
||||
# - Adds the HashiCorp apt sources list only if it is absent.
|
||||
# - Skips `apt-get install` entirely when the installed version already
|
||||
# matches ${NOMAD_VERSION}.
|
||||
#
|
||||
# Configuration:
|
||||
# NOMAD_VERSION — pinned Nomad version (default: see below). The apt
|
||||
# package name is versioned as "nomad=<version>-1".
|
||||
#
|
||||
# Usage:
|
||||
# sudo NOMAD_VERSION=1.9.5 lib/init/nomad/install.sh
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 success (installed or already present)
|
||||
# 1 precondition failure (not Debian/Ubuntu, missing tools, not root)
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
# Pin to a specific Nomad 1.x release. Bump here, not at call sites.
|
||||
NOMAD_VERSION="${NOMAD_VERSION:-1.9.5}"
|
||||
|
||||
HASHICORP_KEYRING="/usr/share/keyrings/hashicorp-archive-keyring.gpg"
|
||||
HASHICORP_SOURCES="/etc/apt/sources.list.d/hashicorp.list"
|
||||
HASHICORP_GPG_URL="https://apt.releases.hashicorp.com/gpg"
|
||||
HASHICORP_REPO_URL="https://apt.releases.hashicorp.com"
|
||||
|
||||
log() { printf '[install-nomad] %s\n' "$*"; }
|
||||
die() { printf '[install-nomad] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||
|
||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
die "must run as root (needs apt-get + /usr/share/keyrings write access)"
|
||||
fi
|
||||
|
||||
for bin in apt-get gpg curl lsb_release; do
|
||||
command -v "$bin" >/dev/null 2>&1 \
|
||||
|| die "required binary not found: ${bin}"
|
||||
done
|
||||
|
||||
CODENAME="$(lsb_release -cs)"
|
||||
[ -n "$CODENAME" ] || die "lsb_release returned empty codename"
|
||||
|
||||
# ── Fast-path: already at desired version? ───────────────────────────────────
|
||||
installed_version=""
|
||||
if command -v nomad >/dev/null 2>&1; then
|
||||
# `nomad version` prints e.g. "Nomad v1.9.5" on the first line.
|
||||
installed_version="$(nomad version 2>/dev/null \
|
||||
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}')"
|
||||
fi
|
||||
|
||||
if [ "$installed_version" = "$NOMAD_VERSION" ]; then
|
||||
log "nomad ${NOMAD_VERSION} already installed — nothing to do"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ── Ensure HashiCorp apt keyring ─────────────────────────────────────────────
|
||||
if [ ! -f "$HASHICORP_KEYRING" ]; then
|
||||
log "adding HashiCorp apt keyring → ${HASHICORP_KEYRING}"
|
||||
tmpkey="$(mktemp)"
|
||||
trap 'rm -f "$tmpkey"' EXIT
|
||||
curl -fsSL "$HASHICORP_GPG_URL" -o "$tmpkey" \
|
||||
|| die "failed to fetch HashiCorp GPG key from ${HASHICORP_GPG_URL}"
|
||||
gpg --dearmor -o "$HASHICORP_KEYRING" < "$tmpkey" \
|
||||
|| die "failed to dearmor HashiCorp GPG key"
|
||||
chmod 0644 "$HASHICORP_KEYRING"
|
||||
rm -f "$tmpkey"
|
||||
trap - EXIT
|
||||
else
|
||||
log "HashiCorp apt keyring already present"
|
||||
fi
|
||||
|
||||
# ── Ensure HashiCorp apt sources list ────────────────────────────────────────
|
||||
desired_source="deb [signed-by=${HASHICORP_KEYRING}] ${HASHICORP_REPO_URL} ${CODENAME} main"
|
||||
if [ ! -f "$HASHICORP_SOURCES" ] \
|
||||
|| ! grep -qxF "$desired_source" "$HASHICORP_SOURCES"; then
|
||||
log "writing HashiCorp apt sources list → ${HASHICORP_SOURCES}"
|
||||
printf '%s\n' "$desired_source" > "$HASHICORP_SOURCES"
|
||||
apt_update_needed=1
|
||||
else
|
||||
log "HashiCorp apt sources list already present"
|
||||
apt_update_needed=0
|
||||
fi
|
||||
|
||||
# ── Install the pinned version ───────────────────────────────────────────────
|
||||
if [ "$apt_update_needed" -eq 1 ]; then
|
||||
log "running apt-get update"
|
||||
DEBIAN_FRONTEND=noninteractive apt-get update -qq \
|
||||
|| die "apt-get update failed"
|
||||
fi
|
||||
|
||||
# HashiCorp apt packages use the "<version>-1" package-revision suffix.
|
||||
pkg_spec="nomad=${NOMAD_VERSION}-1"
|
||||
log "installing ${pkg_spec}"
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
"$pkg_spec" \
|
||||
|| die "apt-get install ${pkg_spec} failed"
|
||||
|
||||
# ── Verify ───────────────────────────────────────────────────────────────────
|
||||
final_version="$(nomad version 2>/dev/null \
|
||||
| awk 'NR==1 {sub(/^v/, "", $2); print $2; exit}')"
|
||||
if [ "$final_version" != "$NOMAD_VERSION" ]; then
|
||||
die "post-install check: expected ${NOMAD_VERSION}, got '${final_version}'"
|
||||
fi
|
||||
|
||||
log "nomad ${NOMAD_VERSION} installed successfully"
|
||||
130
lib/init/nomad/systemd-nomad.sh
Executable file
130
lib/init/nomad/systemd-nomad.sh
Executable file
|
|
@ -0,0 +1,130 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# lib/init/nomad/systemd-nomad.sh — Idempotent systemd unit installer for Nomad
|
||||
#
|
||||
# Part of the Nomad+Vault migration (S0.2, issue #822). Writes
|
||||
# /etc/systemd/system/nomad.service pointing at /etc/nomad.d/ and runs
|
||||
# `systemctl enable nomad` WITHOUT starting the service — we don't launch
|
||||
# the cluster until S0.4 wires everything together.
|
||||
#
|
||||
# Idempotency contract:
|
||||
# - Existing unit file is NOT rewritten when on-disk content already
|
||||
# matches the desired content (avoids spurious `daemon-reload`).
|
||||
# - `systemctl enable` on an already-enabled unit is a no-op.
|
||||
# - This script is safe to run unconditionally before every factory boot.
|
||||
#
|
||||
# Preconditions:
|
||||
# - nomad binary installed (see lib/init/nomad/install.sh)
|
||||
# - /etc/nomad.d/ will hold server.hcl / client.hcl (placed by S0.4)
|
||||
#
|
||||
# Usage:
|
||||
# sudo lib/init/nomad/systemd-nomad.sh
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 success (unit installed + enabled, or already so)
|
||||
# 1 precondition failure (not root, no systemctl, no nomad binary)
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
UNIT_PATH="/etc/systemd/system/nomad.service"
|
||||
NOMAD_CONFIG_DIR="/etc/nomad.d"
|
||||
NOMAD_DATA_DIR="/var/lib/nomad"
|
||||
|
||||
log() { printf '[systemd-nomad] %s\n' "$*"; }
|
||||
die() { printf '[systemd-nomad] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||
|
||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
die "must run as root (needs write access to ${UNIT_PATH})"
|
||||
fi
|
||||
|
||||
command -v systemctl >/dev/null 2>&1 \
|
||||
|| die "systemctl not found (systemd is required)"
|
||||
|
||||
NOMAD_BIN="$(command -v nomad 2>/dev/null || true)"
|
||||
[ -n "$NOMAD_BIN" ] \
|
||||
|| die "nomad binary not found — run lib/init/nomad/install.sh first"
|
||||
|
||||
# ── Desired unit content ─────────────────────────────────────────────────────
|
||||
# Upstream-recommended baseline (https://developer.hashicorp.com/nomad/docs/install/production/deployment-guide)
|
||||
# trimmed for a single-node combined server+client dev box.
|
||||
# - Wants=/After= network-online: nomad must have networking up.
|
||||
# - User/Group=root: the Docker driver needs root to talk to dockerd.
|
||||
# - LimitNOFILE/LimitNPROC=infinity: avoid Nomad's startup warning.
|
||||
# - KillSignal=SIGINT: triggers Nomad's graceful shutdown path.
|
||||
# - Restart=on-failure with a bounded burst to avoid crash-loops eating the
|
||||
# journal when /etc/nomad.d/ is mis-configured.
|
||||
read -r -d '' DESIRED_UNIT <<EOF || true
|
||||
[Unit]
|
||||
Description=Nomad
|
||||
Documentation=https://developer.hashicorp.com/nomad/docs
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
# When Docker is present, ensure dockerd is up before nomad starts — the
|
||||
# Docker task driver needs the daemon socket available at startup.
|
||||
Wants=docker.service
|
||||
After=docker.service
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=root
|
||||
Group=root
|
||||
ExecReload=/bin/kill -HUP \$MAINPID
|
||||
ExecStart=${NOMAD_BIN} agent -config=${NOMAD_CONFIG_DIR}
|
||||
KillMode=process
|
||||
KillSignal=SIGINT
|
||||
LimitNOFILE=infinity
|
||||
LimitNPROC=infinity
|
||||
Restart=on-failure
|
||||
RestartSec=2
|
||||
StartLimitBurst=3
|
||||
StartLimitIntervalSec=10
|
||||
TasksMax=infinity
|
||||
OOMScoreAdjust=-1000
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# ── Ensure config + data dirs exist ──────────────────────────────────────────
|
||||
# We do not populate /etc/nomad.d/ here (that's S0.4). We do create the
|
||||
# directory so `nomad agent -config=/etc/nomad.d` doesn't error if the unit
|
||||
# is started before hcl files are dropped in.
|
||||
for d in "$NOMAD_CONFIG_DIR" "$NOMAD_DATA_DIR"; do
|
||||
if [ ! -d "$d" ]; then
|
||||
log "creating ${d}"
|
||||
install -d -m 0755 "$d"
|
||||
fi
|
||||
done
|
||||
|
||||
# ── Install unit file only if content differs ────────────────────────────────
|
||||
needs_reload=0
|
||||
if [ ! -f "$UNIT_PATH" ] \
|
||||
|| ! printf '%s\n' "$DESIRED_UNIT" | cmp -s - "$UNIT_PATH"; then
|
||||
log "writing unit → ${UNIT_PATH}"
|
||||
tmp="$(mktemp)"
|
||||
trap 'rm -f "$tmp"' EXIT
|
||||
printf '%s\n' "$DESIRED_UNIT" > "$tmp"
|
||||
install -m 0644 -o root -g root "$tmp" "$UNIT_PATH"
|
||||
rm -f "$tmp"
|
||||
trap - EXIT
|
||||
needs_reload=1
|
||||
else
|
||||
log "unit file already up to date"
|
||||
fi
|
||||
|
||||
# ── Reload + enable ──────────────────────────────────────────────────────────
|
||||
if [ "$needs_reload" -eq 1 ]; then
|
||||
log "systemctl daemon-reload"
|
||||
systemctl daemon-reload
|
||||
fi
|
||||
|
||||
if systemctl is-enabled --quiet nomad.service 2>/dev/null; then
|
||||
log "nomad.service already enabled"
|
||||
else
|
||||
log "systemctl enable nomad"
|
||||
systemctl enable nomad.service >/dev/null
|
||||
fi
|
||||
|
||||
log "done — unit installed and enabled (NOT started; S0.4 brings the cluster up)"
|
||||
Loading…
Add table
Add a link
Reference in a new issue