fix: [nomad-step-3] S3.2 — nomad/jobs/woodpecker-agent.hcl (host-net, docker.sock) (#935)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/nomad-validate Pipeline was successful
ci/woodpecker/pr/secret-scan Pipeline was successful

This commit is contained in:
Agent 2026-04-17 05:16:15 +00:00
parent b501077352
commit 5d76cc96fb
2 changed files with 143 additions and 0 deletions

View file

@ -0,0 +1,138 @@
# =============================================================================
# nomad/jobs/woodpecker-agent.hcl Woodpecker CI agent (Nomad service job)
#
# Part of the Nomad+Vault migration (S3.2, issue #935).
# Drop-in for the current docker-compose setup with host networking +
# docker.sock mount, enabling the agent to spawn containers via the
# mounted socket.
#
# Host networking:
# Uses network_mode = "host" to match the compose setup. The Woodpecker
# server gRPC endpoint is addressed as "localhost:9000" since both
# server and agent run on the same host.
#
# Vault integration:
# - vault { role = "service-woodpecker-agent" } at the group scope the
# task's workload-identity JWT is exchanged for a Vault token carrying
# the policy named on that role. Role + policy are defined in
# vault/roles.yaml + vault/policies/service-woodpecker.hcl.
# - template stanza pulls WOODPECKER_AGENT_SECRET from Vault KV v2
# at kv/disinto/shared/woodpecker and writes it to secrets/agent.env.
# Seeded on fresh boxes by tools/vault-seed-woodpecker.sh.
# =============================================================================
job "woodpecker-agent" {
type = "service"
datacenters = ["dc1"]
group "woodpecker-agent" {
count = 1
# Vault workload identity
# `role = "service-woodpecker-agent"` is defined in vault/roles.yaml and
# applied by tools/vault-apply-roles.sh. The role's bound
# claim pins nomad_job_id = "woodpecker-agent" renaming this
# jobspec's `job "woodpecker-agent"` without updating vault/roles.yaml
# will make token exchange fail at placement with a "claim mismatch"
# error.
vault {
role = "service-woodpecker-agent"
}
# Health check port: static 3333 for Nomad service discovery. The agent
# exposes :3333/healthz for Nomad to probe.
network {
port "healthz" {
static = 3333
}
}
# Native Nomad service discovery for the health check endpoint.
service {
name = "woodpecker-agent"
port = "healthz"
provider = "nomad"
check {
type = "http"
path = "/healthz"
interval = "15s"
timeout = "3s"
}
}
# Conservative restart policy fail fast to the scheduler instead of
# spinning on a broken image/config. 3 attempts over 5m, then back off.
restart {
attempts = 3
interval = "5m"
delay = "15s"
mode = "delay"
}
task "woodpecker-agent" {
driver = "docker"
config {
image = "woodpeckerci/woodpecker-agent:v3"
network_mode = "host"
privileged = true
volumes = ["/var/run/docker.sock:/var/run/docker.sock"]
}
# Non-secret env server address, gRPC security, concurrency limit,
# and health check endpoint. Nothing sensitive here.
env {
WOODPECKER_SERVER = "localhost:9000"
WOODPECKER_GRPC_SECURE = "false"
WOODPECKER_MAX_WORKFLOWS = "1"
WOODPECKER_HEALTHCHECK_ADDR = ":3333"
}
# Vault-templated agent secret
# Renders <task-dir>/secrets/agent.env (per-alloc secrets dir,
# never on disk on the host root filesystem, never in `nomad job
# inspect` output). `env = true` merges WOODPECKER_AGENT_SECRET
# from the file into the task environment.
#
# Vault path: `kv/data/disinto/shared/woodpecker`. The literal
# `/data/` segment is required by consul-template for KV v2 mounts.
#
# Empty-Vault fallback (`with ... else ...`): on a fresh LXC where
# the KV path is absent, consul-template's `with` short-circuits to
# the `else` branch. Emitting a visible placeholder means the
# container still boots, but with an obviously-bad secret that an
# operator will spot better than the agent failing silently with
# auth errors. Seed the path with tools/vault-seed-woodpecker.sh
# to replace the placeholder.
#
# Placeholder values are kept short on purpose: the repo-wide
# secret-scan (.woodpecker/secret-scan.yml lib/secret-scan.sh)
# flags `TOKEN=<16+ non-space chars>` as a plaintext secret, so a
# descriptive long placeholder would fail CI on every PR that touched
# this file. "seed-me" is < 16 chars and still distinctive enough
# to surface in a `grep WOODPECKER` audit.
template {
destination = "secrets/agent.env"
env = true
change_mode = "restart"
error_on_missing_key = false
data = <<EOT
{{- with secret "kv/data/disinto/shared/woodpecker" -}}
WOODPECKER_AGENT_SECRET={{ .Data.data.agent_secret }}
{{- else -}}
# WARNING: kv/disinto/shared/woodpecker is empty run tools/vault-seed-woodpecker.sh
WOODPECKER_AGENT_SECRET=seed-me
{{- end -}}
EOT
}
# Baseline tune once we have real usage numbers under nomad.
# Conservative limits so an unhealthy agent can't starve the node.
resources {
cpu = 200
memory = 256
}
}
}
}