diff --git a/AGENTS.md b/AGENTS.md index 722bc23..ccc0613 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -39,7 +39,7 @@ disinto/ (code repo) │ hooks/ — Claude Code session hooks (on-compact-reinject, on-idle-stop, on-phase-change, on-pretooluse-guard, on-session-end, on-stop-failure) │ init/nomad/ — cluster-up.sh, install.sh, vault-init.sh, lib-systemd.sh (Nomad+Vault Step 0 installers, #821-#825); wp-oauth-register.sh (Forgejo OAuth2 app + Vault KV seeder for Woodpecker, S3.3); deploy.sh (dependency-ordered Nomad job deploy + health-wait, S4) ├── nomad/ server.hcl, client.hcl (allow_privileged for woodpecker-agent, S3-fix-5), vault.hcl — HCL configs deployed to /etc/nomad.d/ and /etc/vault.d/ by lib/init/nomad/cluster-up.sh -│ jobs/ — Nomad jobspecs: forgejo.hcl (Vault secrets via template, S2.4); woodpecker-server.hcl + woodpecker-agent.hcl (host-net, docker.sock, Vault KV, S3.1-S3.2); agents.hcl (7 roles, llama, Vault-templated bot tokens, S4.1); vault-runner.hcl (parameterized batch dispatch, S5.3) +│ jobs/ — Nomad jobspecs: forgejo.hcl (Vault secrets via template, S2.4); woodpecker-server.hcl + woodpecker-agent.hcl (host-net, docker.sock, Vault KV, S3.1-S3.2); agents.hcl (7 roles, llama, Vault-templated bot tokens, S4.1) ├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored) ├── formulas/ Issue templates (TOML specs for multi-step agent tasks) ├── docker/ Dockerfiles and entrypoints: reproduce, triage, edge dispatcher, chat (server.py, entrypoint-chat.sh, Dockerfile, ui/) diff --git a/nomad/jobs/chat.hcl b/nomad/jobs/chat.hcl index 8fa5c93..a45c4c7 100644 --- a/nomad/jobs/chat.hcl +++ b/nomad/jobs/chat.hcl @@ -9,9 +9,10 @@ # (same :local pattern as disinto/agents:local). # # Sandbox hardening (#706): -# - Read-only root filesystem (enforced via entrypoint) +# - Read-only root filesystem # - tmpfs /tmp:size=64m for runtime temp files # - cap_drop ALL (no Linux capabilities) +# - pids_limit 128 (prevent fork bombs) # - mem_limit 512m (matches compose sandbox hardening) # # Vault integration: @@ -89,10 +90,17 @@ job "chat" { image = "disinto/chat:local" force_pull = false # Sandbox hardening (#706): cap_drop ALL (no Linux capabilities) - # tmpfs /tmp for runtime files (64MB) - # ReadonlyRootfs enforced via entrypoint script (fails if running as root) - cap_drop = ["ALL"] - tmpfs = ["/tmp:size=64m"] + # Note: Nomad docker driver maps these to Docker's CapDrop options. + # ReadonlyRootfs is set via the container's entrypoint script + # (entrypoint-chat.sh fails if running as root). + cap_drop = ["ALL"] + } + + # ── tmpfs /tmp:size=64m ──────────────────────────────────────────────── + # Sandbox hardening (#706): isolated temp area on tmpfs for runtime + # files. The ephemeral block provides anonymous tmpfs storage. + ephemeral { + size = 64 # MB } # ── Volume mounts ────────────────────────────────────────────────────── diff --git a/nomad/jobs/vault-runner.hcl b/nomad/jobs/vault-runner.hcl deleted file mode 100644 index f7b9aed..0000000 --- a/nomad/jobs/vault-runner.hcl +++ /dev/null @@ -1,132 +0,0 @@ -# ============================================================================= -# nomad/jobs/vault-runner.hcl — Parameterized batch job for vault action dispatch -# -# Part of the Nomad+Vault migration (S5.3, issue #990). Replaces the -# `docker run --rm vault-runner-${action_id}` pattern in dispatcher.sh with -# a Nomad-native parameterized batch job. Dispatched by the edge dispatcher -# (S5.4) via `nomad job dispatch`. -# -# Parameterized meta: -# action_id — vault action identifier (used by entrypoint-runner.sh) -# secrets_csv — comma-separated secret names (e.g. "GITHUB_TOKEN,DEPLOY_KEY") -# -# Vault integration (approach A — pre-defined templates): -# All 6 known runner secrets are rendered via template stanzas with -# error_on_missing_key = false. Secrets not granted by the dispatch's -# Vault policies render as empty strings. The dispatcher (S5.4) sets -# vault { policies = [...] } per-dispatch based on the action TOML's -# secrets=[...] list, scoping access to only the declared secrets. -# -# Cleanup: Nomad garbage-collects completed batch dispatches automatically. -# ============================================================================= - -job "vault-runner" { - type = "batch" - datacenters = ["dc1"] - - parameterized { - meta_required = ["action_id", "secrets_csv"] - } - - group "runner" { - count = 1 - - # ── Vault workload identity ────────────────────────────────────────────── - # Per-dispatch policies are composed by the dispatcher (S5.4) based on the - # action TOML's secrets=[...] list. Each policy grants read access to - # exactly one kv/data/disinto/runner/ path. Roles defined in - # vault/roles.yaml (runner-), policies in vault/policies/. - vault {} - - volume "ops-repo" { - type = "host" - source = "ops-repo" - read_only = true - } - - # No restart for batch — fail fast, let the dispatcher handle retries. - restart { - attempts = 0 - mode = "fail" - } - - task "runner" { - driver = "docker" - - config { - image = "disinto/agents:local" - force_pull = false - entrypoint = ["bash"] - args = [ - "/home/agent/disinto/docker/runner/entrypoint-runner.sh", - "${NOMAD_META_action_id}", - ] - } - - volume_mount { - volume = "ops-repo" - destination = "/home/agent/ops" - read_only = true - } - - # ── Non-secret env ─────────────────────────────────────────────────────── - env { - DISINTO_CONTAINER = "1" - FACTORY_ROOT = "/home/agent/disinto" - OPS_REPO_ROOT = "/home/agent/ops" - } - - # ── Vault-templated runner secrets (approach A) ──────────────────────── - # Pre-defined templates for all 6 known runner secrets. Each renders - # from kv/data/disinto/runner/. Secrets not granted by the - # dispatch's Vault policies produce empty env vars (harmless). - # error_on_missing_key = false prevents template-pending hangs when - # a secret path is absent or the policy doesn't grant access. - # - # Placeholder values kept < 16 chars to avoid secret-scan CI failures. - template { - destination = "secrets/runner.env" - env = true - error_on_missing_key = false - data = <