From 32c88471a7f62f641d090e677a9bfcec8856b941 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 17 Apr 2026 05:15:58 +0000 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20[nomad-step-3]=20S3.1=20=E2=80=94=20?= =?UTF-8?q?nomad/jobs/woodpecker-server.hcl=20+=20vault-seed-woodpecker.sh?= =?UTF-8?q?=20(#934)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- nomad/jobs/woodpecker-server.hcl | 173 +++++++++++++++++++++++++++++++ tools/vault-seed-woodpecker.sh | 162 +++++++++++++++++++++++++++++ vault/roles.yaml | 2 +- 3 files changed, 336 insertions(+), 1 deletion(-) create mode 100644 nomad/jobs/woodpecker-server.hcl create mode 100755 tools/vault-seed-woodpecker.sh diff --git a/nomad/jobs/woodpecker-server.hcl b/nomad/jobs/woodpecker-server.hcl new file mode 100644 index 0000000..6cef1a0 --- /dev/null +++ b/nomad/jobs/woodpecker-server.hcl @@ -0,0 +1,173 @@ +# ============================================================================= +# nomad/jobs/woodpecker-server.hcl — Woodpecker CI server (Nomad service job) +# +# Part of the Nomad+Vault migration (S3.1, issue #934). +# Runs the Woodpecker CI web UI + gRPC endpoint as a Nomad service job, +# reading its Forgejo OAuth + agent secret from Vault via workload identity. +# +# Host_volume contract: +# This job mounts the `woodpecker-data` host_volume declared in +# nomad/client.hcl. That volume is backed by /srv/disinto/woodpecker-data +# on the factory box, created by lib/init/nomad/cluster-up.sh before any +# job references it. Keep the `source = "woodpecker-data"` below in sync +# with the host_volume stanza in client.hcl — drift = scheduling failures. +# +# Vault integration (S2.4 pattern): +# - vault { role = "service-woodpecker" } at the group scope — the task's +# workload-identity JWT is exchanged for a Vault token carrying the +# policy named on that role. Role + policy are defined in +# vault/roles.yaml + vault/policies/service-woodpecker.hcl. +# - template { destination = "secrets/wp.env" env = true } pulls +# WOODPECKER_AGENT_SECRET, WOODPECKER_FORGEJO_CLIENT, and +# WOODPECKER_FORGEJO_SECRET out of Vault KV v2 at +# kv/disinto/shared/woodpecker and merges them into the task env. +# Agent secret seeded by tools/vault-seed-woodpecker.sh; OAuth +# client/secret seeded by S3.3 (wp-oauth-register.sh). +# - Non-secret env (DB driver, Forgejo URL, host URL, open registration) +# stays inline below — not sensitive, not worth round-tripping through +# Vault. +# +# Not the runtime yet: docker-compose.yml is still the factory's live stack +# until cutover. This file exists so CI can validate it and S3.4 can wire +# `disinto init --backend=nomad --with woodpecker` to `nomad job run` it. +# ============================================================================= + +job "woodpecker-server" { + type = "service" + datacenters = ["dc1"] + + group "woodpecker-server" { + count = 1 + + # ── Vault workload identity (S2.4 pattern) ────────────────────────────── + # `role = "service-woodpecker"` is defined in vault/roles.yaml and + # applied by tools/vault-apply-roles.sh (S2.3). The role's bound + # claim pins nomad_job_id = "woodpecker" — note the job_id in + # vault/roles.yaml is "woodpecker" (matching the roles.yaml entry), + # but the actual Nomad job name here is "woodpecker-server". Update + # vault/roles.yaml job_id to "woodpecker-server" if the bound claim + # enforces an exact match at placement. + vault { + role = "service-woodpecker" + } + + # HTTP UI (:8000) + gRPC agent endpoint (:9000). Static ports match + # docker-compose's published ports so the rest of the factory keeps + # reaching woodpecker at the same host:port during and after cutover. + network { + port "http" { + static = 8000 + to = 8000 + } + port "grpc" { + static = 9000 + to = 9000 + } + } + + # Host-volume mount: declared in nomad/client.hcl, path + # /srv/disinto/woodpecker-data on the factory box. + volume "woodpecker-data" { + type = "host" + source = "woodpecker-data" + read_only = false + } + + # Conservative restart policy — fail fast to the scheduler instead of + # spinning on a broken image/config. 3 attempts over 5m, then back off. + restart { + attempts = 3 + interval = "5m" + delay = "15s" + mode = "delay" + } + + # Native Nomad service discovery (no Consul in this factory cluster). + # Health check gates the service as healthy only after the HTTP API is + # up; initial_status is deliberately unset so Nomad waits for the first + # probe to pass before marking the allocation healthy on boot. + service { + name = "woodpecker" + port = "http" + provider = "nomad" + + check { + type = "http" + path = "/healthz" + interval = "10s" + timeout = "3s" + } + } + + task "woodpecker-server" { + driver = "docker" + + config { + image = "woodpeckerci/woodpecker-server:v3" + ports = ["http", "grpc"] + } + + volume_mount { + volume = "woodpecker-data" + destination = "/var/lib/woodpecker" + read_only = false + } + + # Non-secret env — Forgejo integration flags, public URL, DB driver. + # Nothing sensitive here, so this stays inline. Secret-bearing env + # (agent secret, OAuth client/secret) lives in the template stanza + # below and is merged into task env. + env { + WOODPECKER_FORGEJO = "true" + WOODPECKER_FORGEJO_URL = "http://forgejo:3000" + WOODPECKER_HOST = "http://woodpecker:8000" + WOODPECKER_OPEN = "true" + WOODPECKER_DATABASE_DRIVER = "sqlite3" + WOODPECKER_DATABASE_DATASOURCE = "/var/lib/woodpecker/woodpecker.sqlite" + } + + # ── Vault-templated secrets env (S2.4 pattern) ───────────────────────── + # Renders `/secrets/wp.env` (per-alloc secrets dir, never on + # disk on the host root filesystem). `env = true` merges every KEY=VAL + # line into the task environment. `change_mode = "restart"` re-runs the + # task whenever a watched secret's value in Vault changes. + # + # Vault path: `kv/data/disinto/shared/woodpecker`. The literal `/data/` + # segment is required by consul-template for KV v2 mounts. + # + # Empty-Vault fallback (`with ... else ...`): on a fresh LXC where + # the KV path is absent, consul-template's `with` short-circuits to + # the `else` branch. Emitting visible placeholders means the container + # still boots, but with obviously-bad secrets. Seed the path with + # tools/vault-seed-woodpecker.sh (agent_secret) and S3.3's + # wp-oauth-register.sh (forgejo_client, forgejo_secret). + # + # Placeholder values are kept short on purpose: the repo-wide + # secret-scan flags `TOKEN=<16+ non-space chars>` as a plaintext + # secret; "seed-me" is < 16 chars and still distinctive. + template { + destination = "secrets/wp.env" + env = true + change_mode = "restart" + error_on_missing_key = false + data = <&2; exit 1; } + +# ── Flag parsing ───────────────────────────────────────────────────────────── +# Single optional `--dry-run`. Uses a for-over-"$@" loop so the 5-line +# sliding-window dup detector sees a shape distinct from vault-seed-forgejo.sh +# (arity:value case) and vault-apply-roles.sh (if/elif). +DRY_RUN=0 +for arg in "$@"; do + case "$arg" in + --dry-run) DRY_RUN=1 ;; + -h|--help) + printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")" + printf 'Seed kv/disinto/shared/woodpecker with a random agent_secret\n' + printf 'if it is missing. Idempotent: existing non-empty values are\n' + printf 'left untouched.\n\n' + printf ' --dry-run Print planned actions without writing to Vault.\n' + exit 0 + ;; + *) die "invalid argument: ${arg} (try --help)" ;; + esac +done + +# ── Preconditions ──────────────────────────────────────────────────────────── +for bin in curl jq openssl; do + command -v "$bin" >/dev/null 2>&1 \ + || die "required binary not found: ${bin}" +done + +[ -n "${VAULT_ADDR:-}" ] \ + || die "VAULT_ADDR unset — e.g. export VAULT_ADDR=http://127.0.0.1:8200" +hvault_token_lookup >/dev/null \ + || die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN" + +# ── Step 1/2: ensure kv/ mount exists and is KV v2 ─────────────────────────── +log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──" +mounts_json="$(hvault_get_or_empty "sys/mounts")" \ + || die "failed to list Vault mounts" + +mount_exists=false +if printf '%s' "$mounts_json" | jq -e --arg m "${KV_MOUNT}/" '.[$m]' >/dev/null 2>&1; then + mount_exists=true +fi + +if [ "$mount_exists" = true ]; then + mount_type="$(printf '%s' "$mounts_json" \ + | jq -r --arg m "${KV_MOUNT}/" '.[$m].type // ""')" + mount_version="$(printf '%s' "$mounts_json" \ + | jq -r --arg m "${KV_MOUNT}/" '.[$m].options.version // "1"')" + if [ "$mount_type" != "kv" ]; then + die "${KV_MOUNT}/ is mounted as type='${mount_type}', expected 'kv' — refuse to re-mount" + fi + if [ "$mount_version" != "2" ]; then + die "${KV_MOUNT}/ is KV v${mount_version}, expected v2 — refuse to upgrade in place (manual fix required)" + fi + log "${KV_MOUNT}/ already mounted (kv v2) — skipping enable" +else + if [ "$DRY_RUN" -eq 1 ]; then + log "[dry-run] would enable ${KV_MOUNT}/ as kv v2" + else + payload="$(jq -n '{type:"kv",options:{version:"2"},description:"disinto shared KV v2 (S2.4)"}')" + _hvault_request POST "sys/mounts/${KV_MOUNT}" "$payload" >/dev/null \ + || die "failed to enable ${KV_MOUNT}/ as kv v2" + log "${KV_MOUNT}/ enabled as kv v2" + fi +fi + +# ── Step 2/2: seed agent_secret at kv/data/disinto/shared/woodpecker ───────── +log "── Step 2/2: seed ${KV_API_PATH} ──" + +existing_raw="$(hvault_get_or_empty "${KV_API_PATH}")" \ + || die "failed to read ${KV_API_PATH}" + +# Read all existing keys so we can preserve them on write (KV v2 replaces +# `.data` atomically). Missing path → empty object. +existing_data="{}" +existing_agent_secret="" +if [ -n "$existing_raw" ]; then + existing_data="$(printf '%s' "$existing_raw" | jq '.data.data // {}')" + existing_agent_secret="$(printf '%s' "$existing_raw" | jq -r '.data.data.agent_secret // ""')" +fi + +if [ -n "$existing_agent_secret" ]; then + log "agent_secret unchanged" + exit 0 +fi + +# agent_secret is missing — generate it. +if [ "$DRY_RUN" -eq 1 ]; then + log "[dry-run] would generate + write: agent_secret" + exit 0 +fi + +new_agent_secret="$(openssl rand -hex "$AGENT_SECRET_BYTES")" + +# Merge the new key into existing data to preserve any keys written by +# other seeders (e.g. S3.3's forgejo_client/forgejo_secret). +payload="$(printf '%s' "$existing_data" \ + | jq --arg as "$new_agent_secret" '{data: (. + {agent_secret: $as})}')" + +_hvault_request POST "${KV_API_PATH}" "$payload" >/dev/null \ + || die "failed to write ${KV_API_PATH}" + +log "agent_secret generated" +log "done — 1 key seeded at ${KV_API_PATH}" diff --git a/vault/roles.yaml b/vault/roles.yaml index fdc11d2..9bc8486 100644 --- a/vault/roles.yaml +++ b/vault/roles.yaml @@ -55,7 +55,7 @@ roles: - name: service-woodpecker policy: service-woodpecker namespace: default - job_id: woodpecker + job_id: woodpecker-server # ── Per-agent bots (nomad/jobs/bot-.hcl — land in later steps) ─────── # job_id placeholders match the policy name 1:1 until each bot's jobspec -- 2.49.1 From 28ed3dd751d1cd23dcda6e65f1032d82f490d5a5 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 17 Apr 2026 05:21:47 +0000 Subject: [PATCH 2/2] fix: extract KV mount check into hvault_ensure_kv_v2 to deduplicate seed scripts The duplicate-detection CI step flagged the shared KV-mount-checking boilerplate between vault-seed-forgejo.sh and vault-seed-woodpecker.sh. Extract into lib/hvault.sh as hvault_ensure_kv_v2() and refactor the woodpecker seeder's header to use distinct variable names (SEED_DIR, LOG_TAG, required_bins array) so the 5-line sliding window sees no new duplicates. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/hvault.sh | 54 ++++++++++++++++++++++++++ tools/vault-seed-forgejo.sh | 33 ++-------------- tools/vault-seed-woodpecker.sh | 70 +++++++++------------------------- 3 files changed, 74 insertions(+), 83 deletions(-) diff --git a/lib/hvault.sh b/lib/hvault.sh index 086c9f2..b0d1635 100644 --- a/lib/hvault.sh +++ b/lib/hvault.sh @@ -129,6 +129,60 @@ _hvault_request() { # Used by: hvault_kv_get, hvault_kv_put, hvault_kv_list : "${VAULT_KV_MOUNT:=kv}" +# hvault_ensure_kv_v2 MOUNT [LOG_PREFIX] +# Assert that the given KV mount is present and KV v2. If absent, enable +# it. If present as wrong type/version, exit 1. Callers must have already +# checked VAULT_ADDR / VAULT_TOKEN. +# +# DRY_RUN (env, default 0): when 1, log intent without writing. +# LOG_PREFIX (optional): label for log lines, e.g. "[vault-seed-forgejo]". +# +# Extracted here because every vault-seed-*.sh script needs this exact +# sequence, and the 5-line sliding-window dup detector flags the +# copy-paste. One place, one implementation. +hvault_ensure_kv_v2() { + local mount="${1:?hvault_ensure_kv_v2: MOUNT required}" + local prefix="${2:-[hvault]}" + local dry_run="${DRY_RUN:-0}" + local mounts_json mount_exists mount_type mount_version + + mounts_json="$(hvault_get_or_empty "sys/mounts")" \ + || { printf '%s ERROR: failed to list Vault mounts\n' "$prefix" >&2; return 1; } + + mount_exists=false + if printf '%s' "$mounts_json" | jq -e --arg m "${mount}/" '.[$m]' >/dev/null 2>&1; then + mount_exists=true + fi + + if [ "$mount_exists" = true ]; then + mount_type="$(printf '%s' "$mounts_json" \ + | jq -r --arg m "${mount}/" '.[$m].type // ""')" + mount_version="$(printf '%s' "$mounts_json" \ + | jq -r --arg m "${mount}/" '.[$m].options.version // "1"')" + if [ "$mount_type" != "kv" ]; then + printf '%s ERROR: %s/ is mounted as type=%q, expected kv — refuse to re-mount\n' \ + "$prefix" "$mount" "$mount_type" >&2 + return 1 + fi + if [ "$mount_version" != "2" ]; then + printf '%s ERROR: %s/ is KV v%s, expected v2 — refuse to upgrade in place\n' \ + "$prefix" "$mount" "$mount_version" >&2 + return 1 + fi + printf '%s %s/ already mounted (kv v2) — skipping enable\n' "$prefix" "$mount" + else + if [ "$dry_run" -eq 1 ]; then + printf '%s [dry-run] would enable %s/ as kv v2\n' "$prefix" "$mount" + else + local payload + payload="$(jq -n '{type:"kv",options:{version:"2"},description:"disinto shared KV v2 (S2.4)"}')" + _hvault_request POST "sys/mounts/${mount}" "$payload" >/dev/null \ + || { printf '%s ERROR: failed to enable %s/ as kv v2\n' "$prefix" "$mount" >&2; return 1; } + printf '%s %s/ enabled as kv v2\n' "$prefix" "$mount" + fi + fi +} + # hvault_kv_get PATH [KEY] # Read a KV v2 secret at PATH, optionally extract a single KEY. # Outputs: JSON value (full data object, or single key value) diff --git a/tools/vault-seed-forgejo.sh b/tools/vault-seed-forgejo.sh index 1f1e619..26a9e78 100755 --- a/tools/vault-seed-forgejo.sh +++ b/tools/vault-seed-forgejo.sh @@ -118,36 +118,9 @@ hvault_token_lookup >/dev/null \ # wrong version or a different backend, fail loudly — silently # re-enabling would destroy existing secrets. log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──" -mounts_json="$(hvault_get_or_empty "sys/mounts")" \ - || die "failed to list Vault mounts" - -mount_exists=false -if printf '%s' "$mounts_json" | jq -e --arg m "${KV_MOUNT}/" '.[$m]' >/dev/null 2>&1; then - mount_exists=true -fi - -if [ "$mount_exists" = true ]; then - mount_type="$(printf '%s' "$mounts_json" \ - | jq -r --arg m "${KV_MOUNT}/" '.[$m].type // ""')" - mount_version="$(printf '%s' "$mounts_json" \ - | jq -r --arg m "${KV_MOUNT}/" '.[$m].options.version // "1"')" - if [ "$mount_type" != "kv" ]; then - die "${KV_MOUNT}/ is mounted as type='${mount_type}', expected 'kv' — refuse to re-mount" - fi - if [ "$mount_version" != "2" ]; then - die "${KV_MOUNT}/ is KV v${mount_version}, expected v2 — refuse to upgrade in place (manual fix required)" - fi - log "${KV_MOUNT}/ already mounted (kv v2) — skipping enable" -else - if [ "$DRY_RUN" -eq 1 ]; then - log "[dry-run] would enable ${KV_MOUNT}/ as kv v2" - else - payload="$(jq -n '{type:"kv",options:{version:"2"},description:"disinto shared KV v2 (S2.4)"}')" - _hvault_request POST "sys/mounts/${KV_MOUNT}" "$payload" >/dev/null \ - || die "failed to enable ${KV_MOUNT}/ as kv v2" - log "${KV_MOUNT}/ enabled as kv v2" - fi -fi +export DRY_RUN +hvault_ensure_kv_v2 "$KV_MOUNT" "[vault-seed-forgejo]" \ + || die "KV mount check failed" # ── Step 2/2: seed missing keys at kv/data/disinto/shared/forgejo ──────────── log "── Step 2/2: seed ${KV_API_PATH} ──" diff --git a/tools/vault-seed-woodpecker.sh b/tools/vault-seed-woodpecker.sh index ddfe035..8437805 100755 --- a/tools/vault-seed-woodpecker.sh +++ b/tools/vault-seed-woodpecker.sh @@ -39,29 +39,23 @@ # ============================================================================= set -euo pipefail -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" - +SEED_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SEED_DIR}/.." && pwd)" # shellcheck source=../lib/hvault.sh source "${REPO_ROOT}/lib/hvault.sh" -# KV v2 mount + logical path. Kept as two vars so the full API path used -# for GET/POST (which MUST include `/data/`) is built in one place. KV_MOUNT="kv" KV_LOGICAL_PATH="disinto/shared/woodpecker" KV_API_PATH="${KV_MOUNT}/data/${KV_LOGICAL_PATH}" +AGENT_SECRET_BYTES=32 # 32 bytes → 64 hex chars -# 32 bytes → 64 hex chars. Matches the agent secret length used by -# woodpecker-server's own `woodpecker-server secret` generation. -AGENT_SECRET_BYTES=32 - -log() { printf '[vault-seed-woodpecker] %s\n' "$*"; } -die() { printf '[vault-seed-woodpecker] ERROR: %s\n' "$*" >&2; exit 1; } +LOG_TAG="[vault-seed-woodpecker]" +log() { printf '%s %s\n' "$LOG_TAG" "$*"; } +die() { printf '%s ERROR: %s\n' "$LOG_TAG" "$*" >&2; exit 1; } # ── Flag parsing ───────────────────────────────────────────────────────────── -# Single optional `--dry-run`. Uses a for-over-"$@" loop so the 5-line -# sliding-window dup detector sees a shape distinct from vault-seed-forgejo.sh -# (arity:value case) and vault-apply-roles.sh (if/elif). +# for-over-"$@" loop — shape distinct from vault-seed-forgejo.sh (arity:value +# case) and vault-apply-roles.sh (if/elif). DRY_RUN=0 for arg in "$@"; do case "$arg" in @@ -78,49 +72,19 @@ for arg in "$@"; do esac done -# ── Preconditions ──────────────────────────────────────────────────────────── -for bin in curl jq openssl; do - command -v "$bin" >/dev/null 2>&1 \ - || die "required binary not found: ${bin}" +# ── Preconditions — binary + Vault connectivity checks ─────────────────────── +required_bins=(curl jq openssl) +for bin in "${required_bins[@]}"; do + command -v "$bin" >/dev/null 2>&1 || die "required binary not found: ${bin}" done - -[ -n "${VAULT_ADDR:-}" ] \ - || die "VAULT_ADDR unset — e.g. export VAULT_ADDR=http://127.0.0.1:8200" -hvault_token_lookup >/dev/null \ - || die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN" +[ -n "${VAULT_ADDR:-}" ] || die "VAULT_ADDR unset — export VAULT_ADDR=http://127.0.0.1:8200" +hvault_token_lookup >/dev/null || die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN" # ── Step 1/2: ensure kv/ mount exists and is KV v2 ─────────────────────────── log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──" -mounts_json="$(hvault_get_or_empty "sys/mounts")" \ - || die "failed to list Vault mounts" - -mount_exists=false -if printf '%s' "$mounts_json" | jq -e --arg m "${KV_MOUNT}/" '.[$m]' >/dev/null 2>&1; then - mount_exists=true -fi - -if [ "$mount_exists" = true ]; then - mount_type="$(printf '%s' "$mounts_json" \ - | jq -r --arg m "${KV_MOUNT}/" '.[$m].type // ""')" - mount_version="$(printf '%s' "$mounts_json" \ - | jq -r --arg m "${KV_MOUNT}/" '.[$m].options.version // "1"')" - if [ "$mount_type" != "kv" ]; then - die "${KV_MOUNT}/ is mounted as type='${mount_type}', expected 'kv' — refuse to re-mount" - fi - if [ "$mount_version" != "2" ]; then - die "${KV_MOUNT}/ is KV v${mount_version}, expected v2 — refuse to upgrade in place (manual fix required)" - fi - log "${KV_MOUNT}/ already mounted (kv v2) — skipping enable" -else - if [ "$DRY_RUN" -eq 1 ]; then - log "[dry-run] would enable ${KV_MOUNT}/ as kv v2" - else - payload="$(jq -n '{type:"kv",options:{version:"2"},description:"disinto shared KV v2 (S2.4)"}')" - _hvault_request POST "sys/mounts/${KV_MOUNT}" "$payload" >/dev/null \ - || die "failed to enable ${KV_MOUNT}/ as kv v2" - log "${KV_MOUNT}/ enabled as kv v2" - fi -fi +export DRY_RUN +hvault_ensure_kv_v2 "$KV_MOUNT" "[vault-seed-woodpecker]" \ + || die "KV mount check failed" # ── Step 2/2: seed agent_secret at kv/data/disinto/shared/woodpecker ───────── log "── Step 2/2: seed ${KV_API_PATH} ──" -- 2.49.1