diff --git a/lib/hvault.sh b/lib/hvault.sh index b0d1635..086c9f2 100644 --- a/lib/hvault.sh +++ b/lib/hvault.sh @@ -129,60 +129,6 @@ _hvault_request() { # Used by: hvault_kv_get, hvault_kv_put, hvault_kv_list : "${VAULT_KV_MOUNT:=kv}" -# hvault_ensure_kv_v2 MOUNT [LOG_PREFIX] -# Assert that the given KV mount is present and KV v2. If absent, enable -# it. If present as wrong type/version, exit 1. Callers must have already -# checked VAULT_ADDR / VAULT_TOKEN. -# -# DRY_RUN (env, default 0): when 1, log intent without writing. -# LOG_PREFIX (optional): label for log lines, e.g. "[vault-seed-forgejo]". -# -# Extracted here because every vault-seed-*.sh script needs this exact -# sequence, and the 5-line sliding-window dup detector flags the -# copy-paste. One place, one implementation. -hvault_ensure_kv_v2() { - local mount="${1:?hvault_ensure_kv_v2: MOUNT required}" - local prefix="${2:-[hvault]}" - local dry_run="${DRY_RUN:-0}" - local mounts_json mount_exists mount_type mount_version - - mounts_json="$(hvault_get_or_empty "sys/mounts")" \ - || { printf '%s ERROR: failed to list Vault mounts\n' "$prefix" >&2; return 1; } - - mount_exists=false - if printf '%s' "$mounts_json" | jq -e --arg m "${mount}/" '.[$m]' >/dev/null 2>&1; then - mount_exists=true - fi - - if [ "$mount_exists" = true ]; then - mount_type="$(printf '%s' "$mounts_json" \ - | jq -r --arg m "${mount}/" '.[$m].type // ""')" - mount_version="$(printf '%s' "$mounts_json" \ - | jq -r --arg m "${mount}/" '.[$m].options.version // "1"')" - if [ "$mount_type" != "kv" ]; then - printf '%s ERROR: %s/ is mounted as type=%q, expected kv — refuse to re-mount\n' \ - "$prefix" "$mount" "$mount_type" >&2 - return 1 - fi - if [ "$mount_version" != "2" ]; then - printf '%s ERROR: %s/ is KV v%s, expected v2 — refuse to upgrade in place\n' \ - "$prefix" "$mount" "$mount_version" >&2 - return 1 - fi - printf '%s %s/ already mounted (kv v2) — skipping enable\n' "$prefix" "$mount" - else - if [ "$dry_run" -eq 1 ]; then - printf '%s [dry-run] would enable %s/ as kv v2\n' "$prefix" "$mount" - else - local payload - payload="$(jq -n '{type:"kv",options:{version:"2"},description:"disinto shared KV v2 (S2.4)"}')" - _hvault_request POST "sys/mounts/${mount}" "$payload" >/dev/null \ - || { printf '%s ERROR: failed to enable %s/ as kv v2\n' "$prefix" "$mount" >&2; return 1; } - printf '%s %s/ enabled as kv v2\n' "$prefix" "$mount" - fi - fi -} - # hvault_kv_get PATH [KEY] # Read a KV v2 secret at PATH, optionally extract a single KEY. # Outputs: JSON value (full data object, or single key value) diff --git a/nomad/jobs/woodpecker-agent.hcl b/nomad/jobs/woodpecker-agent.hcl index de81459..3cd9287 100644 --- a/nomad/jobs/woodpecker-agent.hcl +++ b/nomad/jobs/woodpecker-agent.hcl @@ -12,10 +12,10 @@ # server and agent run on the same host. # # Vault integration: -# - vault { role = "service-woodpecker-agent" } at the group scope — the -# task's workload-identity JWT is exchanged for a Vault token carrying -# the policy named on that role. Role + policy are defined in -# vault/roles.yaml + vault/policies/service-woodpecker.hcl. +# - vault { role = "woodpecker-agent" } at the group scope — the task's +# workload-identity JWT is exchanged for a Vault token carrying the +# policy named on that role. Role + policy are defined in +# vault/roles.yaml + vault/policies/woodpecker-agent.hcl. # - template stanza pulls WOODPECKER_AGENT_SECRET from Vault KV v2 # at kv/disinto/shared/woodpecker and writes it to secrets/agent.env. # Seeded on fresh boxes by tools/vault-seed-woodpecker.sh. @@ -29,25 +29,18 @@ job "woodpecker-agent" { count = 1 # ── Vault workload identity ───────────────────────────────────────── - # `role = "service-woodpecker-agent"` is defined in vault/roles.yaml and + # `role = "woodpecker-agent"` is defined in vault/roles.yaml and # applied by tools/vault-apply-roles.sh. The role's bound # claim pins nomad_job_id = "woodpecker-agent" — renaming this # jobspec's `job "woodpecker-agent"` without updating vault/roles.yaml # will make token exchange fail at placement with a "claim mismatch" # error. vault { - role = "service-woodpecker-agent" - } - - # Health check port: static 3333 for Nomad service discovery. The agent - # exposes :3333/healthz for Nomad to probe. - network { - port "healthz" { - static = 3333 - } + role = "woodpecker-agent" } # Native Nomad service discovery for the health check endpoint. + # The agent exposes :3333/healthz for Nomad to probe. service { name = "woodpecker-agent" port = "healthz" diff --git a/nomad/jobs/woodpecker-server.hcl b/nomad/jobs/woodpecker-server.hcl deleted file mode 100644 index 6cef1a0..0000000 --- a/nomad/jobs/woodpecker-server.hcl +++ /dev/null @@ -1,173 +0,0 @@ -# ============================================================================= -# nomad/jobs/woodpecker-server.hcl — Woodpecker CI server (Nomad service job) -# -# Part of the Nomad+Vault migration (S3.1, issue #934). -# Runs the Woodpecker CI web UI + gRPC endpoint as a Nomad service job, -# reading its Forgejo OAuth + agent secret from Vault via workload identity. -# -# Host_volume contract: -# This job mounts the `woodpecker-data` host_volume declared in -# nomad/client.hcl. That volume is backed by /srv/disinto/woodpecker-data -# on the factory box, created by lib/init/nomad/cluster-up.sh before any -# job references it. Keep the `source = "woodpecker-data"` below in sync -# with the host_volume stanza in client.hcl — drift = scheduling failures. -# -# Vault integration (S2.4 pattern): -# - vault { role = "service-woodpecker" } at the group scope — the task's -# workload-identity JWT is exchanged for a Vault token carrying the -# policy named on that role. Role + policy are defined in -# vault/roles.yaml + vault/policies/service-woodpecker.hcl. -# - template { destination = "secrets/wp.env" env = true } pulls -# WOODPECKER_AGENT_SECRET, WOODPECKER_FORGEJO_CLIENT, and -# WOODPECKER_FORGEJO_SECRET out of Vault KV v2 at -# kv/disinto/shared/woodpecker and merges them into the task env. -# Agent secret seeded by tools/vault-seed-woodpecker.sh; OAuth -# client/secret seeded by S3.3 (wp-oauth-register.sh). -# - Non-secret env (DB driver, Forgejo URL, host URL, open registration) -# stays inline below — not sensitive, not worth round-tripping through -# Vault. -# -# Not the runtime yet: docker-compose.yml is still the factory's live stack -# until cutover. This file exists so CI can validate it and S3.4 can wire -# `disinto init --backend=nomad --with woodpecker` to `nomad job run` it. -# ============================================================================= - -job "woodpecker-server" { - type = "service" - datacenters = ["dc1"] - - group "woodpecker-server" { - count = 1 - - # ── Vault workload identity (S2.4 pattern) ────────────────────────────── - # `role = "service-woodpecker"` is defined in vault/roles.yaml and - # applied by tools/vault-apply-roles.sh (S2.3). The role's bound - # claim pins nomad_job_id = "woodpecker" — note the job_id in - # vault/roles.yaml is "woodpecker" (matching the roles.yaml entry), - # but the actual Nomad job name here is "woodpecker-server". Update - # vault/roles.yaml job_id to "woodpecker-server" if the bound claim - # enforces an exact match at placement. - vault { - role = "service-woodpecker" - } - - # HTTP UI (:8000) + gRPC agent endpoint (:9000). Static ports match - # docker-compose's published ports so the rest of the factory keeps - # reaching woodpecker at the same host:port during and after cutover. - network { - port "http" { - static = 8000 - to = 8000 - } - port "grpc" { - static = 9000 - to = 9000 - } - } - - # Host-volume mount: declared in nomad/client.hcl, path - # /srv/disinto/woodpecker-data on the factory box. - volume "woodpecker-data" { - type = "host" - source = "woodpecker-data" - read_only = false - } - - # Conservative restart policy — fail fast to the scheduler instead of - # spinning on a broken image/config. 3 attempts over 5m, then back off. - restart { - attempts = 3 - interval = "5m" - delay = "15s" - mode = "delay" - } - - # Native Nomad service discovery (no Consul in this factory cluster). - # Health check gates the service as healthy only after the HTTP API is - # up; initial_status is deliberately unset so Nomad waits for the first - # probe to pass before marking the allocation healthy on boot. - service { - name = "woodpecker" - port = "http" - provider = "nomad" - - check { - type = "http" - path = "/healthz" - interval = "10s" - timeout = "3s" - } - } - - task "woodpecker-server" { - driver = "docker" - - config { - image = "woodpeckerci/woodpecker-server:v3" - ports = ["http", "grpc"] - } - - volume_mount { - volume = "woodpecker-data" - destination = "/var/lib/woodpecker" - read_only = false - } - - # Non-secret env — Forgejo integration flags, public URL, DB driver. - # Nothing sensitive here, so this stays inline. Secret-bearing env - # (agent secret, OAuth client/secret) lives in the template stanza - # below and is merged into task env. - env { - WOODPECKER_FORGEJO = "true" - WOODPECKER_FORGEJO_URL = "http://forgejo:3000" - WOODPECKER_HOST = "http://woodpecker:8000" - WOODPECKER_OPEN = "true" - WOODPECKER_DATABASE_DRIVER = "sqlite3" - WOODPECKER_DATABASE_DATASOURCE = "/var/lib/woodpecker/woodpecker.sqlite" - } - - # ── Vault-templated secrets env (S2.4 pattern) ───────────────────────── - # Renders `/secrets/wp.env` (per-alloc secrets dir, never on - # disk on the host root filesystem). `env = true` merges every KEY=VAL - # line into the task environment. `change_mode = "restart"` re-runs the - # task whenever a watched secret's value in Vault changes. - # - # Vault path: `kv/data/disinto/shared/woodpecker`. The literal `/data/` - # segment is required by consul-template for KV v2 mounts. - # - # Empty-Vault fallback (`with ... else ...`): on a fresh LXC where - # the KV path is absent, consul-template's `with` short-circuits to - # the `else` branch. Emitting visible placeholders means the container - # still boots, but with obviously-bad secrets. Seed the path with - # tools/vault-seed-woodpecker.sh (agent_secret) and S3.3's - # wp-oauth-register.sh (forgejo_client, forgejo_secret). - # - # Placeholder values are kept short on purpose: the repo-wide - # secret-scan flags `TOKEN=<16+ non-space chars>` as a plaintext - # secret; "seed-me" is < 16 chars and still distinctive. - template { - destination = "secrets/wp.env" - env = true - change_mode = "restart" - error_on_missing_key = false - data = </dev/null \ # wrong version or a different backend, fail loudly — silently # re-enabling would destroy existing secrets. log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──" -export DRY_RUN -hvault_ensure_kv_v2 "$KV_MOUNT" "[vault-seed-forgejo]" \ - || die "KV mount check failed" +mounts_json="$(hvault_get_or_empty "sys/mounts")" \ + || die "failed to list Vault mounts" + +mount_exists=false +if printf '%s' "$mounts_json" | jq -e --arg m "${KV_MOUNT}/" '.[$m]' >/dev/null 2>&1; then + mount_exists=true +fi + +if [ "$mount_exists" = true ]; then + mount_type="$(printf '%s' "$mounts_json" \ + | jq -r --arg m "${KV_MOUNT}/" '.[$m].type // ""')" + mount_version="$(printf '%s' "$mounts_json" \ + | jq -r --arg m "${KV_MOUNT}/" '.[$m].options.version // "1"')" + if [ "$mount_type" != "kv" ]; then + die "${KV_MOUNT}/ is mounted as type='${mount_type}', expected 'kv' — refuse to re-mount" + fi + if [ "$mount_version" != "2" ]; then + die "${KV_MOUNT}/ is KV v${mount_version}, expected v2 — refuse to upgrade in place (manual fix required)" + fi + log "${KV_MOUNT}/ already mounted (kv v2) — skipping enable" +else + if [ "$DRY_RUN" -eq 1 ]; then + log "[dry-run] would enable ${KV_MOUNT}/ as kv v2" + else + payload="$(jq -n '{type:"kv",options:{version:"2"},description:"disinto shared KV v2 (S2.4)"}')" + _hvault_request POST "sys/mounts/${KV_MOUNT}" "$payload" >/dev/null \ + || die "failed to enable ${KV_MOUNT}/ as kv v2" + log "${KV_MOUNT}/ enabled as kv v2" + fi +fi # ── Step 2/2: seed missing keys at kv/data/disinto/shared/forgejo ──────────── log "── Step 2/2: seed ${KV_API_PATH} ──" diff --git a/tools/vault-seed-woodpecker.sh b/tools/vault-seed-woodpecker.sh deleted file mode 100755 index 8437805..0000000 --- a/tools/vault-seed-woodpecker.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# tools/vault-seed-woodpecker.sh — Idempotent seed for kv/disinto/shared/woodpecker -# -# Part of the Nomad+Vault migration (S3.1, issue #934). Populates the -# `agent_secret` key at the KV v2 path that nomad/jobs/woodpecker-server.hcl -# reads from, so a clean-install factory has a pre-shared agent secret for -# woodpecker-server ↔ woodpecker-agent communication. -# -# Scope: ONLY seeds `agent_secret`. The Forgejo OAuth client/secret -# (`forgejo_client`, `forgejo_secret`) are written by S3.3's -# wp-oauth-register.sh after creating the OAuth app via the Forgejo API. -# This script preserves any existing keys it doesn't own. -# -# Idempotency contract (per key): -# - Key missing or empty in Vault → generate a random value, write it, -# log "agent_secret generated". -# - Key present with a non-empty value → leave untouched, log -# "agent_secret unchanged". -# -# Preconditions: -# - Vault reachable + unsealed at $VAULT_ADDR. -# - VAULT_TOKEN set (env) or /etc/vault.d/root.token readable. -# - The `kv/` mount is enabled as KV v2 (this script enables it on a -# fresh box; on an existing box it asserts the mount type/version). -# -# Requires: -# - VAULT_ADDR (e.g. http://127.0.0.1:8200) -# - VAULT_TOKEN (env OR /etc/vault.d/root.token, resolved by lib/hvault.sh) -# - curl, jq, openssl -# -# Usage: -# tools/vault-seed-woodpecker.sh -# tools/vault-seed-woodpecker.sh --dry-run -# -# Exit codes: -# 0 success (seed applied, or already applied) -# 1 precondition / API / mount-mismatch failure -# ============================================================================= -set -euo pipefail - -SEED_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SEED_DIR}/.." && pwd)" -# shellcheck source=../lib/hvault.sh -source "${REPO_ROOT}/lib/hvault.sh" - -KV_MOUNT="kv" -KV_LOGICAL_PATH="disinto/shared/woodpecker" -KV_API_PATH="${KV_MOUNT}/data/${KV_LOGICAL_PATH}" -AGENT_SECRET_BYTES=32 # 32 bytes → 64 hex chars - -LOG_TAG="[vault-seed-woodpecker]" -log() { printf '%s %s\n' "$LOG_TAG" "$*"; } -die() { printf '%s ERROR: %s\n' "$LOG_TAG" "$*" >&2; exit 1; } - -# ── Flag parsing ───────────────────────────────────────────────────────────── -# for-over-"$@" loop — shape distinct from vault-seed-forgejo.sh (arity:value -# case) and vault-apply-roles.sh (if/elif). -DRY_RUN=0 -for arg in "$@"; do - case "$arg" in - --dry-run) DRY_RUN=1 ;; - -h|--help) - printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")" - printf 'Seed kv/disinto/shared/woodpecker with a random agent_secret\n' - printf 'if it is missing. Idempotent: existing non-empty values are\n' - printf 'left untouched.\n\n' - printf ' --dry-run Print planned actions without writing to Vault.\n' - exit 0 - ;; - *) die "invalid argument: ${arg} (try --help)" ;; - esac -done - -# ── Preconditions — binary + Vault connectivity checks ─────────────────────── -required_bins=(curl jq openssl) -for bin in "${required_bins[@]}"; do - command -v "$bin" >/dev/null 2>&1 || die "required binary not found: ${bin}" -done -[ -n "${VAULT_ADDR:-}" ] || die "VAULT_ADDR unset — export VAULT_ADDR=http://127.0.0.1:8200" -hvault_token_lookup >/dev/null || die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN" - -# ── Step 1/2: ensure kv/ mount exists and is KV v2 ─────────────────────────── -log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──" -export DRY_RUN -hvault_ensure_kv_v2 "$KV_MOUNT" "[vault-seed-woodpecker]" \ - || die "KV mount check failed" - -# ── Step 2/2: seed agent_secret at kv/data/disinto/shared/woodpecker ───────── -log "── Step 2/2: seed ${KV_API_PATH} ──" - -existing_raw="$(hvault_get_or_empty "${KV_API_PATH}")" \ - || die "failed to read ${KV_API_PATH}" - -# Read all existing keys so we can preserve them on write (KV v2 replaces -# `.data` atomically). Missing path → empty object. -existing_data="{}" -existing_agent_secret="" -if [ -n "$existing_raw" ]; then - existing_data="$(printf '%s' "$existing_raw" | jq '.data.data // {}')" - existing_agent_secret="$(printf '%s' "$existing_raw" | jq -r '.data.data.agent_secret // ""')" -fi - -if [ -n "$existing_agent_secret" ]; then - log "agent_secret unchanged" - exit 0 -fi - -# agent_secret is missing — generate it. -if [ "$DRY_RUN" -eq 1 ]; then - log "[dry-run] would generate + write: agent_secret" - exit 0 -fi - -new_agent_secret="$(openssl rand -hex "$AGENT_SECRET_BYTES")" - -# Merge the new key into existing data to preserve any keys written by -# other seeders (e.g. S3.3's forgejo_client/forgejo_secret). -payload="$(printf '%s' "$existing_data" \ - | jq --arg as "$new_agent_secret" '{data: (. + {agent_secret: $as})}')" - -_hvault_request POST "${KV_API_PATH}" "$payload" >/dev/null \ - || die "failed to write ${KV_API_PATH}" - -log "agent_secret generated" -log "done — 1 key seeded at ${KV_API_PATH}" diff --git a/vault/roles.yaml b/vault/roles.yaml index 2109504..fdc11d2 100644 --- a/vault/roles.yaml +++ b/vault/roles.yaml @@ -55,12 +55,7 @@ roles: - name: service-woodpecker policy: service-woodpecker namespace: default - job_id: woodpecker-server - - - name: service-woodpecker-agent - policy: service-woodpecker - namespace: default - job_id: woodpecker-agent + job_id: woodpecker # ── Per-agent bots (nomad/jobs/bot-.hcl — land in later steps) ─────── # job_id placeholders match the policy name 1:1 until each bot's jobspec