diff --git a/AGENTS.md b/AGENTS.md index 722bc23..ccc0613 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -39,7 +39,7 @@ disinto/ (code repo) │ hooks/ — Claude Code session hooks (on-compact-reinject, on-idle-stop, on-phase-change, on-pretooluse-guard, on-session-end, on-stop-failure) │ init/nomad/ — cluster-up.sh, install.sh, vault-init.sh, lib-systemd.sh (Nomad+Vault Step 0 installers, #821-#825); wp-oauth-register.sh (Forgejo OAuth2 app + Vault KV seeder for Woodpecker, S3.3); deploy.sh (dependency-ordered Nomad job deploy + health-wait, S4) ├── nomad/ server.hcl, client.hcl (allow_privileged for woodpecker-agent, S3-fix-5), vault.hcl — HCL configs deployed to /etc/nomad.d/ and /etc/vault.d/ by lib/init/nomad/cluster-up.sh -│ jobs/ — Nomad jobspecs: forgejo.hcl (Vault secrets via template, S2.4); woodpecker-server.hcl + woodpecker-agent.hcl (host-net, docker.sock, Vault KV, S3.1-S3.2); agents.hcl (7 roles, llama, Vault-templated bot tokens, S4.1); vault-runner.hcl (parameterized batch dispatch, S5.3) +│ jobs/ — Nomad jobspecs: forgejo.hcl (Vault secrets via template, S2.4); woodpecker-server.hcl + woodpecker-agent.hcl (host-net, docker.sock, Vault KV, S3.1-S3.2); agents.hcl (7 roles, llama, Vault-templated bot tokens, S4.1) ├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored) ├── formulas/ Issue templates (TOML specs for multi-step agent tasks) ├── docker/ Dockerfiles and entrypoints: reproduce, triage, edge dispatcher, chat (server.py, entrypoint-chat.sh, Dockerfile, ui/) diff --git a/nomad/jobs/edge.hcl b/nomad/jobs/edge.hcl index 1f3e855..46906ac 100644 --- a/nomad/jobs/edge.hcl +++ b/nomad/jobs/edge.hcl @@ -32,7 +32,7 @@ job "edge" { # ── Vault workload identity for dispatcher (S5.1, issue #988) ────────── # Service role for dispatcher task to fetch vault actions from KV v2. - # Role defined in vault/roles.yaml, policy in vault/policies/dispatcher.hcl. + # Role defined in vault/roles.yaml, policy in vault/policies/service-dispatcher.hcl. vault { role = "service-dispatcher" } @@ -106,8 +106,7 @@ job "edge" { security_opt = ["apparmor=unconfined"] } - # Mount caddy-data volume for ACME state and config directory. - # Caddyfile is mounted at /etc/caddy/Caddyfile by entrypoint-edge.sh. + # Mount Caddy config from repo (docker/Caddyfile). volume_mount { volume = "caddy-data" destination = "/data" @@ -116,10 +115,10 @@ job "edge" { # ── Non-secret env ─────────────────────────────────────────────────── env { - FORGE_URL = "http://forgejo:3000" - FORGE_REPO = "disinto-admin/disinto" + FORGE_URL = "http://forgejo:3000" + FORGE_REPO = "disinto-admin/disinto" DISINTO_CONTAINER = "1" - PROJECT_NAME = "disinto" + PROJECT_NAME = "disinto" } # Caddy needs CPU + memory headroom for reverse proxy work. @@ -140,11 +139,6 @@ job "edge" { # apparmor=unconfined matches docker-compose. security_opt = ["apparmor=unconfined"] - - # Mount docker.sock via bind-volume (not host volume) for legacy - # docker backend compat. Nomad host volumes require named volumes - # from client.hcl; socket files cannot be host volumes. - volumes = ["/var/run/docker.sock:/var/run/docker.sock:ro"] } # Mount ops-repo for vault actions polling. @@ -154,6 +148,14 @@ job "edge" { read_only = false } + # Mount docker.sock for legacy docker backend (S5.1 — kept for compat). + # Note: S5.5 will add nomad dispatch backend; this mount may be removed. + volume_mount { + volume = "docker-socket" + destination = "/var/run/docker.sock" + read_only = true + } + # ── Vault-templated secrets (S5.1, issue #988) ────────────────────── # Renders FORGE_TOKEN from Vault KV v2 for ops repo access. template { @@ -189,5 +191,14 @@ EOT memory = 256 } } + + # ── Docker socket volume (S5.1, issue #988) ─────────────────────────── + # Declared here for dispatcher task (legacy docker backend). + # Path /var/run/docker.sock on the host. + volume "docker-socket" { + type = "host" + source = "/var/run/docker.sock" + read_only = true + } } } diff --git a/nomad/jobs/vault-runner.hcl b/nomad/jobs/vault-runner.hcl deleted file mode 100644 index f7b9aed..0000000 --- a/nomad/jobs/vault-runner.hcl +++ /dev/null @@ -1,132 +0,0 @@ -# ============================================================================= -# nomad/jobs/vault-runner.hcl — Parameterized batch job for vault action dispatch -# -# Part of the Nomad+Vault migration (S5.3, issue #990). Replaces the -# `docker run --rm vault-runner-${action_id}` pattern in dispatcher.sh with -# a Nomad-native parameterized batch job. Dispatched by the edge dispatcher -# (S5.4) via `nomad job dispatch`. -# -# Parameterized meta: -# action_id — vault action identifier (used by entrypoint-runner.sh) -# secrets_csv — comma-separated secret names (e.g. "GITHUB_TOKEN,DEPLOY_KEY") -# -# Vault integration (approach A — pre-defined templates): -# All 6 known runner secrets are rendered via template stanzas with -# error_on_missing_key = false. Secrets not granted by the dispatch's -# Vault policies render as empty strings. The dispatcher (S5.4) sets -# vault { policies = [...] } per-dispatch based on the action TOML's -# secrets=[...] list, scoping access to only the declared secrets. -# -# Cleanup: Nomad garbage-collects completed batch dispatches automatically. -# ============================================================================= - -job "vault-runner" { - type = "batch" - datacenters = ["dc1"] - - parameterized { - meta_required = ["action_id", "secrets_csv"] - } - - group "runner" { - count = 1 - - # ── Vault workload identity ────────────────────────────────────────────── - # Per-dispatch policies are composed by the dispatcher (S5.4) based on the - # action TOML's secrets=[...] list. Each policy grants read access to - # exactly one kv/data/disinto/runner/ path. Roles defined in - # vault/roles.yaml (runner-), policies in vault/policies/. - vault {} - - volume "ops-repo" { - type = "host" - source = "ops-repo" - read_only = true - } - - # No restart for batch — fail fast, let the dispatcher handle retries. - restart { - attempts = 0 - mode = "fail" - } - - task "runner" { - driver = "docker" - - config { - image = "disinto/agents:local" - force_pull = false - entrypoint = ["bash"] - args = [ - "/home/agent/disinto/docker/runner/entrypoint-runner.sh", - "${NOMAD_META_action_id}", - ] - } - - volume_mount { - volume = "ops-repo" - destination = "/home/agent/ops" - read_only = true - } - - # ── Non-secret env ─────────────────────────────────────────────────────── - env { - DISINTO_CONTAINER = "1" - FACTORY_ROOT = "/home/agent/disinto" - OPS_REPO_ROOT = "/home/agent/ops" - } - - # ── Vault-templated runner secrets (approach A) ──────────────────────── - # Pre-defined templates for all 6 known runner secrets. Each renders - # from kv/data/disinto/runner/. Secrets not granted by the - # dispatch's Vault policies produce empty env vars (harmless). - # error_on_missing_key = false prevents template-pending hangs when - # a secret path is absent or the policy doesn't grant access. - # - # Placeholder values kept < 16 chars to avoid secret-scan CI failures. - template { - destination = "secrets/runner.env" - env = true - error_on_missing_key = false - data = < policies, NOT this one. This policy stays bound -# to the long-running dispatcher only. - -path "kv/data/disinto/runner/*" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/runner/*" { - capabilities = ["list", "read"] -} - -path "kv/data/disinto/shared/ops-repo" { - capabilities = ["read"] -} - -path "kv/metadata/disinto/shared/ops-repo" { - capabilities = ["list", "read"] -} diff --git a/vault/roles.yaml b/vault/roles.yaml index 07e0527..d3b1892 100644 --- a/vault/roles.yaml +++ b/vault/roles.yaml @@ -121,10 +121,10 @@ roles: job_id: bot-vault # ── Edge dispatcher ──────────────────────────────────────────────────────── - - name: service-dispatcher - policy: service-dispatcher + - name: dispatcher + policy: dispatcher namespace: default - job_id: edge + job_id: dispatcher # ── Per-secret runner roles ──────────────────────────────────────────────── # vault-runner (Step 5) composes runner- policies onto each