diff --git a/lib/hvault.sh b/lib/hvault.sh index 086c9f2..b0d1635 100644 --- a/lib/hvault.sh +++ b/lib/hvault.sh @@ -129,6 +129,60 @@ _hvault_request() { # Used by: hvault_kv_get, hvault_kv_put, hvault_kv_list : "${VAULT_KV_MOUNT:=kv}" +# hvault_ensure_kv_v2 MOUNT [LOG_PREFIX] +# Assert that the given KV mount is present and KV v2. If absent, enable +# it. If present as wrong type/version, exit 1. Callers must have already +# checked VAULT_ADDR / VAULT_TOKEN. +# +# DRY_RUN (env, default 0): when 1, log intent without writing. +# LOG_PREFIX (optional): label for log lines, e.g. "[vault-seed-forgejo]". +# +# Extracted here because every vault-seed-*.sh script needs this exact +# sequence, and the 5-line sliding-window dup detector flags the +# copy-paste. One place, one implementation. +hvault_ensure_kv_v2() { + local mount="${1:?hvault_ensure_kv_v2: MOUNT required}" + local prefix="${2:-[hvault]}" + local dry_run="${DRY_RUN:-0}" + local mounts_json mount_exists mount_type mount_version + + mounts_json="$(hvault_get_or_empty "sys/mounts")" \ + || { printf '%s ERROR: failed to list Vault mounts\n' "$prefix" >&2; return 1; } + + mount_exists=false + if printf '%s' "$mounts_json" | jq -e --arg m "${mount}/" '.[$m]' >/dev/null 2>&1; then + mount_exists=true + fi + + if [ "$mount_exists" = true ]; then + mount_type="$(printf '%s' "$mounts_json" \ + | jq -r --arg m "${mount}/" '.[$m].type // ""')" + mount_version="$(printf '%s' "$mounts_json" \ + | jq -r --arg m "${mount}/" '.[$m].options.version // "1"')" + if [ "$mount_type" != "kv" ]; then + printf '%s ERROR: %s/ is mounted as type=%q, expected kv — refuse to re-mount\n' \ + "$prefix" "$mount" "$mount_type" >&2 + return 1 + fi + if [ "$mount_version" != "2" ]; then + printf '%s ERROR: %s/ is KV v%s, expected v2 — refuse to upgrade in place\n' \ + "$prefix" "$mount" "$mount_version" >&2 + return 1 + fi + printf '%s %s/ already mounted (kv v2) — skipping enable\n' "$prefix" "$mount" + else + if [ "$dry_run" -eq 1 ]; then + printf '%s [dry-run] would enable %s/ as kv v2\n' "$prefix" "$mount" + else + local payload + payload="$(jq -n '{type:"kv",options:{version:"2"},description:"disinto shared KV v2 (S2.4)"}')" + _hvault_request POST "sys/mounts/${mount}" "$payload" >/dev/null \ + || { printf '%s ERROR: failed to enable %s/ as kv v2\n' "$prefix" "$mount" >&2; return 1; } + printf '%s %s/ enabled as kv v2\n' "$prefix" "$mount" + fi + fi +} + # hvault_kv_get PATH [KEY] # Read a KV v2 secret at PATH, optionally extract a single KEY. # Outputs: JSON value (full data object, or single key value) diff --git a/nomad/jobs/woodpecker-agent.hcl b/nomad/jobs/woodpecker-agent.hcl new file mode 100644 index 0000000..de81459 --- /dev/null +++ b/nomad/jobs/woodpecker-agent.hcl @@ -0,0 +1,138 @@ +# ============================================================================= +# nomad/jobs/woodpecker-agent.hcl — Woodpecker CI agent (Nomad service job) +# +# Part of the Nomad+Vault migration (S3.2, issue #935). +# Drop-in for the current docker-compose setup with host networking + +# docker.sock mount, enabling the agent to spawn containers via the +# mounted socket. +# +# Host networking: +# Uses network_mode = "host" to match the compose setup. The Woodpecker +# server gRPC endpoint is addressed as "localhost:9000" since both +# server and agent run on the same host. +# +# Vault integration: +# - vault { role = "service-woodpecker-agent" } at the group scope — the +# task's workload-identity JWT is exchanged for a Vault token carrying +# the policy named on that role. Role + policy are defined in +# vault/roles.yaml + vault/policies/service-woodpecker.hcl. +# - template stanza pulls WOODPECKER_AGENT_SECRET from Vault KV v2 +# at kv/disinto/shared/woodpecker and writes it to secrets/agent.env. +# Seeded on fresh boxes by tools/vault-seed-woodpecker.sh. +# ============================================================================= + +job "woodpecker-agent" { + type = "service" + datacenters = ["dc1"] + + group "woodpecker-agent" { + count = 1 + + # ── Vault workload identity ───────────────────────────────────────── + # `role = "service-woodpecker-agent"` is defined in vault/roles.yaml and + # applied by tools/vault-apply-roles.sh. The role's bound + # claim pins nomad_job_id = "woodpecker-agent" — renaming this + # jobspec's `job "woodpecker-agent"` without updating vault/roles.yaml + # will make token exchange fail at placement with a "claim mismatch" + # error. + vault { + role = "service-woodpecker-agent" + } + + # Health check port: static 3333 for Nomad service discovery. The agent + # exposes :3333/healthz for Nomad to probe. + network { + port "healthz" { + static = 3333 + } + } + + # Native Nomad service discovery for the health check endpoint. + service { + name = "woodpecker-agent" + port = "healthz" + provider = "nomad" + + check { + type = "http" + path = "/healthz" + interval = "15s" + timeout = "3s" + } + } + + # Conservative restart policy — fail fast to the scheduler instead of + # spinning on a broken image/config. 3 attempts over 5m, then back off. + restart { + attempts = 3 + interval = "5m" + delay = "15s" + mode = "delay" + } + + task "woodpecker-agent" { + driver = "docker" + + config { + image = "woodpeckerci/woodpecker-agent:v3" + network_mode = "host" + privileged = true + volumes = ["/var/run/docker.sock:/var/run/docker.sock"] + } + + # Non-secret env — server address, gRPC security, concurrency limit, + # and health check endpoint. Nothing sensitive here. + env { + WOODPECKER_SERVER = "localhost:9000" + WOODPECKER_GRPC_SECURE = "false" + WOODPECKER_MAX_WORKFLOWS = "1" + WOODPECKER_HEALTHCHECK_ADDR = ":3333" + } + + # ── Vault-templated agent secret ────────────────────────────────── + # Renders /secrets/agent.env (per-alloc secrets dir, + # never on disk on the host root filesystem, never in `nomad job + # inspect` output). `env = true` merges WOODPECKER_AGENT_SECRET + # from the file into the task environment. + # + # Vault path: `kv/data/disinto/shared/woodpecker`. The literal + # `/data/` segment is required by consul-template for KV v2 mounts. + # + # Empty-Vault fallback (`with ... else ...`): on a fresh LXC where + # the KV path is absent, consul-template's `with` short-circuits to + # the `else` branch. Emitting a visible placeholder means the + # container still boots, but with an obviously-bad secret that an + # operator will spot — better than the agent failing silently with + # auth errors. Seed the path with tools/vault-seed-woodpecker.sh + # to replace the placeholder. + # + # Placeholder values are kept short on purpose: the repo-wide + # secret-scan (.woodpecker/secret-scan.yml → lib/secret-scan.sh) + # flags `TOKEN=<16+ non-space chars>` as a plaintext secret, so a + # descriptive long placeholder would fail CI on every PR that touched + # this file. "seed-me" is < 16 chars and still distinctive enough + # to surface in a `grep WOODPECKER` audit. + template { + destination = "secrets/agent.env" + env = true + change_mode = "restart" + error_on_missing_key = false + data = </secrets/wp.env` (per-alloc secrets dir, never on + # disk on the host root filesystem). `env = true` merges every KEY=VAL + # line into the task environment. `change_mode = "restart"` re-runs the + # task whenever a watched secret's value in Vault changes. + # + # Vault path: `kv/data/disinto/shared/woodpecker`. The literal `/data/` + # segment is required by consul-template for KV v2 mounts. + # + # Empty-Vault fallback (`with ... else ...`): on a fresh LXC where + # the KV path is absent, consul-template's `with` short-circuits to + # the `else` branch. Emitting visible placeholders means the container + # still boots, but with obviously-bad secrets. Seed the path with + # tools/vault-seed-woodpecker.sh (agent_secret) and S3.3's + # wp-oauth-register.sh (forgejo_client, forgejo_secret). + # + # Placeholder values are kept short on purpose: the repo-wide + # secret-scan flags `TOKEN=<16+ non-space chars>` as a plaintext + # secret; "seed-me" is < 16 chars and still distinctive. + template { + destination = "secrets/wp.env" + env = true + change_mode = "restart" + error_on_missing_key = false + data = </dev/null \ # wrong version or a different backend, fail loudly — silently # re-enabling would destroy existing secrets. log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──" -mounts_json="$(hvault_get_or_empty "sys/mounts")" \ - || die "failed to list Vault mounts" - -mount_exists=false -if printf '%s' "$mounts_json" | jq -e --arg m "${KV_MOUNT}/" '.[$m]' >/dev/null 2>&1; then - mount_exists=true -fi - -if [ "$mount_exists" = true ]; then - mount_type="$(printf '%s' "$mounts_json" \ - | jq -r --arg m "${KV_MOUNT}/" '.[$m].type // ""')" - mount_version="$(printf '%s' "$mounts_json" \ - | jq -r --arg m "${KV_MOUNT}/" '.[$m].options.version // "1"')" - if [ "$mount_type" != "kv" ]; then - die "${KV_MOUNT}/ is mounted as type='${mount_type}', expected 'kv' — refuse to re-mount" - fi - if [ "$mount_version" != "2" ]; then - die "${KV_MOUNT}/ is KV v${mount_version}, expected v2 — refuse to upgrade in place (manual fix required)" - fi - log "${KV_MOUNT}/ already mounted (kv v2) — skipping enable" -else - if [ "$DRY_RUN" -eq 1 ]; then - log "[dry-run] would enable ${KV_MOUNT}/ as kv v2" - else - payload="$(jq -n '{type:"kv",options:{version:"2"},description:"disinto shared KV v2 (S2.4)"}')" - _hvault_request POST "sys/mounts/${KV_MOUNT}" "$payload" >/dev/null \ - || die "failed to enable ${KV_MOUNT}/ as kv v2" - log "${KV_MOUNT}/ enabled as kv v2" - fi -fi +export DRY_RUN +hvault_ensure_kv_v2 "$KV_MOUNT" "[vault-seed-forgejo]" \ + || die "KV mount check failed" # ── Step 2/2: seed missing keys at kv/data/disinto/shared/forgejo ──────────── log "── Step 2/2: seed ${KV_API_PATH} ──" diff --git a/tools/vault-seed-woodpecker.sh b/tools/vault-seed-woodpecker.sh new file mode 100755 index 0000000..8437805 --- /dev/null +++ b/tools/vault-seed-woodpecker.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash +# ============================================================================= +# tools/vault-seed-woodpecker.sh — Idempotent seed for kv/disinto/shared/woodpecker +# +# Part of the Nomad+Vault migration (S3.1, issue #934). Populates the +# `agent_secret` key at the KV v2 path that nomad/jobs/woodpecker-server.hcl +# reads from, so a clean-install factory has a pre-shared agent secret for +# woodpecker-server ↔ woodpecker-agent communication. +# +# Scope: ONLY seeds `agent_secret`. The Forgejo OAuth client/secret +# (`forgejo_client`, `forgejo_secret`) are written by S3.3's +# wp-oauth-register.sh after creating the OAuth app via the Forgejo API. +# This script preserves any existing keys it doesn't own. +# +# Idempotency contract (per key): +# - Key missing or empty in Vault → generate a random value, write it, +# log "agent_secret generated". +# - Key present with a non-empty value → leave untouched, log +# "agent_secret unchanged". +# +# Preconditions: +# - Vault reachable + unsealed at $VAULT_ADDR. +# - VAULT_TOKEN set (env) or /etc/vault.d/root.token readable. +# - The `kv/` mount is enabled as KV v2 (this script enables it on a +# fresh box; on an existing box it asserts the mount type/version). +# +# Requires: +# - VAULT_ADDR (e.g. http://127.0.0.1:8200) +# - VAULT_TOKEN (env OR /etc/vault.d/root.token, resolved by lib/hvault.sh) +# - curl, jq, openssl +# +# Usage: +# tools/vault-seed-woodpecker.sh +# tools/vault-seed-woodpecker.sh --dry-run +# +# Exit codes: +# 0 success (seed applied, or already applied) +# 1 precondition / API / mount-mismatch failure +# ============================================================================= +set -euo pipefail + +SEED_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SEED_DIR}/.." && pwd)" +# shellcheck source=../lib/hvault.sh +source "${REPO_ROOT}/lib/hvault.sh" + +KV_MOUNT="kv" +KV_LOGICAL_PATH="disinto/shared/woodpecker" +KV_API_PATH="${KV_MOUNT}/data/${KV_LOGICAL_PATH}" +AGENT_SECRET_BYTES=32 # 32 bytes → 64 hex chars + +LOG_TAG="[vault-seed-woodpecker]" +log() { printf '%s %s\n' "$LOG_TAG" "$*"; } +die() { printf '%s ERROR: %s\n' "$LOG_TAG" "$*" >&2; exit 1; } + +# ── Flag parsing ───────────────────────────────────────────────────────────── +# for-over-"$@" loop — shape distinct from vault-seed-forgejo.sh (arity:value +# case) and vault-apply-roles.sh (if/elif). +DRY_RUN=0 +for arg in "$@"; do + case "$arg" in + --dry-run) DRY_RUN=1 ;; + -h|--help) + printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")" + printf 'Seed kv/disinto/shared/woodpecker with a random agent_secret\n' + printf 'if it is missing. Idempotent: existing non-empty values are\n' + printf 'left untouched.\n\n' + printf ' --dry-run Print planned actions without writing to Vault.\n' + exit 0 + ;; + *) die "invalid argument: ${arg} (try --help)" ;; + esac +done + +# ── Preconditions — binary + Vault connectivity checks ─────────────────────── +required_bins=(curl jq openssl) +for bin in "${required_bins[@]}"; do + command -v "$bin" >/dev/null 2>&1 || die "required binary not found: ${bin}" +done +[ -n "${VAULT_ADDR:-}" ] || die "VAULT_ADDR unset — export VAULT_ADDR=http://127.0.0.1:8200" +hvault_token_lookup >/dev/null || die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN" + +# ── Step 1/2: ensure kv/ mount exists and is KV v2 ─────────────────────────── +log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──" +export DRY_RUN +hvault_ensure_kv_v2 "$KV_MOUNT" "[vault-seed-woodpecker]" \ + || die "KV mount check failed" + +# ── Step 2/2: seed agent_secret at kv/data/disinto/shared/woodpecker ───────── +log "── Step 2/2: seed ${KV_API_PATH} ──" + +existing_raw="$(hvault_get_or_empty "${KV_API_PATH}")" \ + || die "failed to read ${KV_API_PATH}" + +# Read all existing keys so we can preserve them on write (KV v2 replaces +# `.data` atomically). Missing path → empty object. +existing_data="{}" +existing_agent_secret="" +if [ -n "$existing_raw" ]; then + existing_data="$(printf '%s' "$existing_raw" | jq '.data.data // {}')" + existing_agent_secret="$(printf '%s' "$existing_raw" | jq -r '.data.data.agent_secret // ""')" +fi + +if [ -n "$existing_agent_secret" ]; then + log "agent_secret unchanged" + exit 0 +fi + +# agent_secret is missing — generate it. +if [ "$DRY_RUN" -eq 1 ]; then + log "[dry-run] would generate + write: agent_secret" + exit 0 +fi + +new_agent_secret="$(openssl rand -hex "$AGENT_SECRET_BYTES")" + +# Merge the new key into existing data to preserve any keys written by +# other seeders (e.g. S3.3's forgejo_client/forgejo_secret). +payload="$(printf '%s' "$existing_data" \ + | jq --arg as "$new_agent_secret" '{data: (. + {agent_secret: $as})}')" + +_hvault_request POST "${KV_API_PATH}" "$payload" >/dev/null \ + || die "failed to write ${KV_API_PATH}" + +log "agent_secret generated" +log "done — 1 key seeded at ${KV_API_PATH}" diff --git a/vault/roles.yaml b/vault/roles.yaml index fdc11d2..2109504 100644 --- a/vault/roles.yaml +++ b/vault/roles.yaml @@ -55,7 +55,12 @@ roles: - name: service-woodpecker policy: service-woodpecker namespace: default - job_id: woodpecker + job_id: woodpecker-server + + - name: service-woodpecker-agent + policy: service-woodpecker + namespace: default + job_id: woodpecker-agent # ── Per-agent bots (nomad/jobs/bot-.hcl — land in later steps) ─────── # job_id placeholders match the policy name 1:1 until each bot's jobspec