# ============================================================================= # nomad/jobs/agents.hcl — All-role agent polling loop (Nomad service job) # # Part of the Nomad+Vault migration (S4.1, issue #955). Runs the main bot # polling loop with all 7 agent roles (review, dev, gardener, architect, # planner, predictor, supervisor) against the local llama server. # # Host_volume contract: # This job mounts agent-data, project-repos, and ops-repo from # nomad/client.hcl. Paths under /srv/disinto/* are created by # lib/init/nomad/cluster-up.sh before any job references them. # # Vault integration (S4.1): # - vault { role = "service-agents" } at group scope — workload-identity # JWT exchanged for a Vault token carrying the composite service-agents # policy (vault/policies/service-agents.hcl), which grants read access # to all 7 bot KV namespaces + vault bot + shared forge config. # - template stanza renders per-bot FORGE_*_TOKEN + FORGE_PASS from Vault # KV v2 at kv/disinto/bots/. # - Seeded on fresh boxes by tools/vault-seed-agents.sh. # # Not the runtime yet: docker-compose.yml is still the factory's live stack # until cutover. This file exists so CI can validate it and S4.2 can wire # `disinto init --backend=nomad --with agents` to `nomad job run` it. # ============================================================================= job "agents" { type = "service" datacenters = ["dc1"] group "agents" { count = 1 # ── Vault workload identity (S4.1, issue #955) ─────────────────────────── # Composite role covering all 7 bot identities + vault bot. Role defined # in vault/roles.yaml, policy in vault/policies/service-agents.hcl. # Bound claim pins nomad_job_id = "agents". vault { role = "service-agents" } # No network port — agents are outbound-only (poll forgejo, call llama). # No service discovery block — nothing health-checks agents over HTTP. volume "agent-data" { type = "host" source = "agent-data" read_only = false } volume "project-repos" { type = "host" source = "project-repos" read_only = false } volume "ops-repo" { type = "host" source = "ops-repo" read_only = true } # Conservative restart — fail fast to the scheduler. restart { attempts = 3 interval = "5m" delay = "15s" mode = "delay" } # ── Service registration ──────────────────────────────────────────────── # Agents are outbound-only (poll forgejo, call llama) — no HTTP/TCP # endpoint to probe. The Nomad native provider only supports tcp/http # checks, not script checks. Registering without a check block means # Nomad tracks health via task lifecycle: task running = healthy, # task dead = service deregistered. This matches the docker-compose # pgrep healthcheck semantics (process alive = healthy). service { name = "agents" provider = "nomad" } task "agents" { driver = "docker" config { image = "disinto/agents:local" force_pull = false # apparmor=unconfined matches docker-compose — Claude Code needs # ptrace for node.js inspector and /proc access. security_opt = ["apparmor=unconfined"] } volume_mount { volume = "agent-data" destination = "/home/agent/data" read_only = false } volume_mount { volume = "project-repos" destination = "/home/agent/repos" read_only = false } volume_mount { volume = "ops-repo" destination = "/home/agent/repos/_factory/disinto-ops" read_only = true } # ── Non-secret env ───────────────────────────────────────────────────── env { FORGE_URL = "http://forgejo:3000" FORGE_REPO = "disinto-admin/disinto" ANTHROPIC_BASE_URL = "http://10.10.10.1:8081" ANTHROPIC_API_KEY = "sk-no-key-required" CLAUDE_MODEL = "unsloth/Qwen3.5-35B-A3B" AGENT_ROLES = "review,dev,gardener,architect,planner,predictor,supervisor" POLL_INTERVAL = "300" DISINTO_CONTAINER = "1" PROJECT_NAME = "project" PROJECT_REPO_ROOT = "/home/agent/repos/project" CLAUDE_TIMEOUT = "7200" # llama-specific Claude Code tuning CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC = "1" CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS = "1" CLAUDE_AUTOCOMPACT_PCT_OVERRIDE = "60" } # ── Vault-templated bot tokens (S4.1, issue #955) ───────────────────── # Renders per-bot FORGE_*_TOKEN + FORGE_PASS from Vault KV v2. # Each `with secret ...` block reads one bot's KV path; the `else` # branch emits short placeholders on fresh installs where the path # is absent. Seed with tools/vault-seed-agents.sh. # # Placeholder values kept < 16 chars to avoid secret-scan CI failures. # error_on_missing_key = false prevents template-pending hangs. template { destination = "secrets/bots.env" env = true change_mode = "restart" error_on_missing_key = false data = <