207 lines
7 KiB
HCL
207 lines
7 KiB
HCL
# =============================================================================
|
|
# nomad/jobs/agents.hcl — All-role agent polling loop (Nomad service job)
|
|
#
|
|
# Part of the Nomad+Vault migration (S4.1, issue #955). Runs the main bot
|
|
# polling loop with all 7 agent roles (review, dev, gardener, architect,
|
|
# planner, predictor, supervisor) against the local llama server.
|
|
#
|
|
# Host_volume contract:
|
|
# This job mounts agent-data, project-repos, and ops-repo from
|
|
# nomad/client.hcl. Paths under /srv/disinto/* are created by
|
|
# lib/init/nomad/cluster-up.sh before any job references them.
|
|
#
|
|
# Vault integration (S4.1):
|
|
# - vault { role = "service-agents" } at group scope — workload-identity
|
|
# JWT exchanged for a Vault token carrying the composite service-agents
|
|
# policy (vault/policies/service-agents.hcl), which grants read access
|
|
# to all 7 bot KV namespaces + vault bot + shared forge config.
|
|
# - template stanza renders per-bot FORGE_*_TOKEN + FORGE_PASS from Vault
|
|
# KV v2 at kv/disinto/bots/<role>.
|
|
# - Seeded on fresh boxes by tools/vault-seed-agents.sh.
|
|
#
|
|
# Not the runtime yet: docker-compose.yml is still the factory's live stack
|
|
# until cutover. This file exists so CI can validate it and S4.2 can wire
|
|
# `disinto init --backend=nomad --with agents` to `nomad job run` it.
|
|
# =============================================================================
|
|
|
|
job "agents" {
|
|
type = "service"
|
|
datacenters = ["dc1"]
|
|
|
|
group "agents" {
|
|
count = 1
|
|
|
|
# ── Vault workload identity (S4.1, issue #955) ───────────────────────────
|
|
# Composite role covering all 7 bot identities + vault bot. Role defined
|
|
# in vault/roles.yaml, policy in vault/policies/service-agents.hcl.
|
|
# Bound claim pins nomad_job_id = "agents".
|
|
vault {
|
|
role = "service-agents"
|
|
}
|
|
|
|
# No network port — agents are outbound-only (poll forgejo, call llama).
|
|
# No service discovery block — nothing health-checks agents over HTTP.
|
|
|
|
volume "agent-data" {
|
|
type = "host"
|
|
source = "agent-data"
|
|
read_only = false
|
|
}
|
|
|
|
volume "project-repos" {
|
|
type = "host"
|
|
source = "project-repos"
|
|
read_only = false
|
|
}
|
|
|
|
volume "ops-repo" {
|
|
type = "host"
|
|
source = "ops-repo"
|
|
read_only = true
|
|
}
|
|
|
|
# Conservative restart — fail fast to the scheduler.
|
|
restart {
|
|
attempts = 3
|
|
interval = "5m"
|
|
delay = "15s"
|
|
mode = "delay"
|
|
}
|
|
|
|
# ── Service registration ────────────────────────────────────────────────
|
|
# Agents are outbound-only (poll forgejo, call llama) — no HTTP/TCP
|
|
# endpoint to probe. The Nomad native provider only supports tcp/http
|
|
# checks, not script checks. Registering without a check block means
|
|
# Nomad tracks health via task lifecycle: task running = healthy,
|
|
# task dead = service deregistered. This matches the docker-compose
|
|
# pgrep healthcheck semantics (process alive = healthy).
|
|
service {
|
|
name = "agents"
|
|
provider = "nomad"
|
|
}
|
|
|
|
task "agents" {
|
|
driver = "docker"
|
|
|
|
config {
|
|
image = "disinto/agents:local"
|
|
force_pull = false
|
|
|
|
# apparmor=unconfined matches docker-compose — Claude Code needs
|
|
# ptrace for node.js inspector and /proc access.
|
|
security_opt = ["apparmor=unconfined"]
|
|
}
|
|
|
|
volume_mount {
|
|
volume = "agent-data"
|
|
destination = "/home/agent/data"
|
|
read_only = false
|
|
}
|
|
|
|
volume_mount {
|
|
volume = "project-repos"
|
|
destination = "/home/agent/repos"
|
|
read_only = false
|
|
}
|
|
|
|
volume_mount {
|
|
volume = "ops-repo"
|
|
destination = "/home/agent/repos/_factory/disinto-ops"
|
|
read_only = true
|
|
}
|
|
|
|
# ── Non-secret env ─────────────────────────────────────────────────────
|
|
env {
|
|
FORGE_URL = "http://forgejo:3000"
|
|
FORGE_REPO = "disinto-admin/disinto"
|
|
ANTHROPIC_BASE_URL = "http://10.10.10.1:8081"
|
|
ANTHROPIC_API_KEY = "sk-no-key-required"
|
|
CLAUDE_MODEL = "unsloth/Qwen3.5-35B-A3B"
|
|
AGENT_ROLES = "review,dev,gardener,architect,planner,predictor,supervisor"
|
|
POLL_INTERVAL = "300"
|
|
DISINTO_CONTAINER = "1"
|
|
PROJECT_NAME = "project"
|
|
PROJECT_REPO_ROOT = "/home/agent/repos/project"
|
|
CLAUDE_TIMEOUT = "7200"
|
|
|
|
# llama-specific Claude Code tuning
|
|
CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC = "1"
|
|
CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS = "1"
|
|
CLAUDE_AUTOCOMPACT_PCT_OVERRIDE = "60"
|
|
}
|
|
|
|
# ── Vault-templated bot tokens (S4.1, issue #955) ─────────────────────
|
|
# Renders per-bot FORGE_*_TOKEN + FORGE_PASS from Vault KV v2.
|
|
# Each `with secret ...` block reads one bot's KV path; the `else`
|
|
# branch emits short placeholders on fresh installs where the path
|
|
# is absent. Seed with tools/vault-seed-agents.sh.
|
|
#
|
|
# Placeholder values kept < 16 chars to avoid secret-scan CI failures.
|
|
# error_on_missing_key = false prevents template-pending hangs.
|
|
template {
|
|
destination = "secrets/bots.env"
|
|
env = true
|
|
change_mode = "restart"
|
|
error_on_missing_key = false
|
|
data = <<EOT
|
|
{{- with secret "kv/data/disinto/bots/dev" -}}
|
|
FORGE_TOKEN={{ .Data.data.token }}
|
|
FORGE_PASS={{ .Data.data.pass }}
|
|
{{- else -}}
|
|
# WARNING: run tools/vault-seed-agents.sh
|
|
FORGE_TOKEN=seed-me
|
|
FORGE_PASS=seed-me
|
|
{{- end }}
|
|
|
|
{{ with secret "kv/data/disinto/bots/review" -}}
|
|
FORGE_REVIEW_TOKEN={{ .Data.data.token }}
|
|
{{- else -}}
|
|
FORGE_REVIEW_TOKEN=seed-me
|
|
{{- end }}
|
|
|
|
{{ with secret "kv/data/disinto/bots/gardener" -}}
|
|
FORGE_GARDENER_TOKEN={{ .Data.data.token }}
|
|
{{- else -}}
|
|
FORGE_GARDENER_TOKEN=seed-me
|
|
{{- end }}
|
|
|
|
{{ with secret "kv/data/disinto/bots/architect" -}}
|
|
FORGE_ARCHITECT_TOKEN={{ .Data.data.token }}
|
|
{{- else -}}
|
|
FORGE_ARCHITECT_TOKEN=seed-me
|
|
{{- end }}
|
|
|
|
{{ with secret "kv/data/disinto/bots/planner" -}}
|
|
FORGE_PLANNER_TOKEN={{ .Data.data.token }}
|
|
{{- else -}}
|
|
FORGE_PLANNER_TOKEN=seed-me
|
|
{{- end }}
|
|
|
|
{{ with secret "kv/data/disinto/bots/predictor" -}}
|
|
FORGE_PREDICTOR_TOKEN={{ .Data.data.token }}
|
|
{{- else -}}
|
|
FORGE_PREDICTOR_TOKEN=seed-me
|
|
{{- end }}
|
|
|
|
{{ with secret "kv/data/disinto/bots/supervisor" -}}
|
|
FORGE_SUPERVISOR_TOKEN={{ .Data.data.token }}
|
|
{{- else -}}
|
|
FORGE_SUPERVISOR_TOKEN=seed-me
|
|
{{- end }}
|
|
|
|
{{ with secret "kv/data/disinto/bots/vault" -}}
|
|
FORGE_VAULT_TOKEN={{ .Data.data.token }}
|
|
{{- else -}}
|
|
FORGE_VAULT_TOKEN=seed-me
|
|
{{- end }}
|
|
EOT
|
|
}
|
|
|
|
# Agents run Claude/llama sessions — need CPU + memory headroom.
|
|
resources {
|
|
cpu = 500
|
|
memory = 1024
|
|
}
|
|
}
|
|
}
|
|
}
|