Compare commits
3 commits
ebdf8e0f5e
...
72aecff8d8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
72aecff8d8 | ||
| 84d63d49b5 | |||
|
|
e17e9604c1 |
5 changed files with 358 additions and 4 deletions
|
|
@ -39,7 +39,7 @@ disinto/ (code repo)
|
||||||
│ hooks/ — Claude Code session hooks (on-compact-reinject, on-idle-stop, on-phase-change, on-pretooluse-guard, on-session-end, on-stop-failure)
|
│ hooks/ — Claude Code session hooks (on-compact-reinject, on-idle-stop, on-phase-change, on-pretooluse-guard, on-session-end, on-stop-failure)
|
||||||
│ init/nomad/ — cluster-up.sh, install.sh, vault-init.sh, lib-systemd.sh (Nomad+Vault Step 0 installers, #821-#825); wp-oauth-register.sh (Forgejo OAuth2 app + Vault KV seeder for Woodpecker, S3.3); deploy.sh (dependency-ordered Nomad job deploy + health-wait, S4)
|
│ init/nomad/ — cluster-up.sh, install.sh, vault-init.sh, lib-systemd.sh (Nomad+Vault Step 0 installers, #821-#825); wp-oauth-register.sh (Forgejo OAuth2 app + Vault KV seeder for Woodpecker, S3.3); deploy.sh (dependency-ordered Nomad job deploy + health-wait, S4)
|
||||||
├── nomad/ server.hcl, client.hcl (allow_privileged for woodpecker-agent, S3-fix-5), vault.hcl — HCL configs deployed to /etc/nomad.d/ and /etc/vault.d/ by lib/init/nomad/cluster-up.sh
|
├── nomad/ server.hcl, client.hcl (allow_privileged for woodpecker-agent, S3-fix-5), vault.hcl — HCL configs deployed to /etc/nomad.d/ and /etc/vault.d/ by lib/init/nomad/cluster-up.sh
|
||||||
│ jobs/ — Nomad jobspecs: forgejo.hcl (Vault secrets via template, S2.4); woodpecker-server.hcl + woodpecker-agent.hcl (host-net, docker.sock, Vault KV, S3.1-S3.2); agents.hcl (7 roles, llama, Vault-templated bot tokens, S4.1)
|
│ jobs/ — Nomad jobspecs: forgejo.hcl (Vault secrets via template, S2.4); woodpecker-server.hcl + woodpecker-agent.hcl (host-net, docker.sock, Vault KV, S3.1-S3.2); agents.hcl (7 roles, llama, Vault-templated bot tokens, S4.1); vault-runner.hcl (parameterized batch dispatch, S5.3)
|
||||||
├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored)
|
├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored)
|
||||||
├── formulas/ Issue templates (TOML specs for multi-step agent tasks)
|
├── formulas/ Issue templates (TOML specs for multi-step agent tasks)
|
||||||
├── docker/ Dockerfiles and entrypoints: reproduce, triage, edge dispatcher, chat (server.py, entrypoint-chat.sh, Dockerfile, ui/)
|
├── docker/ Dockerfiles and entrypoints: reproduce, triage, edge dispatcher, chat (server.py, entrypoint-chat.sh, Dockerfile, ui/)
|
||||||
|
|
|
||||||
193
nomad/jobs/edge.hcl
Normal file
193
nomad/jobs/edge.hcl
Normal file
|
|
@ -0,0 +1,193 @@
|
||||||
|
# =============================================================================
|
||||||
|
# nomad/jobs/edge.hcl — Edge proxy (Caddy + dispatcher sidecar) (Nomad service job)
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S5.1, issue #988). Caddy reverse proxy
|
||||||
|
# routes traffic to Forgejo, Woodpecker, staging, and chat services. The
|
||||||
|
# dispatcher sidecar polls disinto-ops for vault actions and dispatches them
|
||||||
|
# via Nomad batch jobs.
|
||||||
|
#
|
||||||
|
# Host_volume contract:
|
||||||
|
# This job mounts caddy-data from nomad/client.hcl. Path
|
||||||
|
# /srv/disinto/caddy-data is created by lib/init/nomad/cluster-up.sh before
|
||||||
|
# any job references it. Keep the `source = "caddy-data"` below in sync
|
||||||
|
# with the host_volume stanza in client.hcl.
|
||||||
|
#
|
||||||
|
# Build step (S5.1):
|
||||||
|
# docker/edge/Dockerfile is custom (adds bash, jq, curl, git, docker-cli,
|
||||||
|
# python3, openssh-client, autossh to caddy:latest). Build as
|
||||||
|
# disinto/edge:local using the same pattern as disinto/agents:local.
|
||||||
|
# Command: docker build -t disinto/edge:local -f docker/edge/Dockerfile docker/edge
|
||||||
|
#
|
||||||
|
# Not the runtime yet: docker-compose.yml is still the factory's live stack
|
||||||
|
# until cutover. This file exists so CI can validate it and S5.2 can wire
|
||||||
|
# `disinto init --backend=nomad --with edge` to `nomad job run` it.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
job "edge" {
|
||||||
|
type = "service"
|
||||||
|
datacenters = ["dc1"]
|
||||||
|
|
||||||
|
group "edge" {
|
||||||
|
count = 1
|
||||||
|
|
||||||
|
# ── Vault workload identity for dispatcher (S5.1, issue #988) ──────────
|
||||||
|
# Service role for dispatcher task to fetch vault actions from KV v2.
|
||||||
|
# Role defined in vault/roles.yaml, policy in vault/policies/dispatcher.hcl.
|
||||||
|
vault {
|
||||||
|
role = "service-dispatcher"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Network ports (S5.1, issue #988) ──────────────────────────────────
|
||||||
|
# Caddy listens on :80 and :443. Expose both on the host.
|
||||||
|
network {
|
||||||
|
port "http" {
|
||||||
|
static = 80
|
||||||
|
to = 80
|
||||||
|
}
|
||||||
|
|
||||||
|
port "https" {
|
||||||
|
static = 443
|
||||||
|
to = 443
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Host-volume mounts (S5.1, issue #988) ─────────────────────────────
|
||||||
|
# caddy-data: ACME certificates, Caddy config state.
|
||||||
|
volume "caddy-data" {
|
||||||
|
type = "host"
|
||||||
|
source = "caddy-data"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
# ops-repo: disinto-ops clone for vault actions polling.
|
||||||
|
volume "ops-repo" {
|
||||||
|
type = "host"
|
||||||
|
source = "ops-repo"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Conservative restart policy ───────────────────────────────────────
|
||||||
|
# Caddy should be stable; dispatcher may restart on errors.
|
||||||
|
restart {
|
||||||
|
attempts = 3
|
||||||
|
interval = "5m"
|
||||||
|
delay = "15s"
|
||||||
|
mode = "delay"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Service registration ───────────────────────────────────────────────
|
||||||
|
# Caddy is an HTTP reverse proxy — health check on port 80.
|
||||||
|
service {
|
||||||
|
name = "edge"
|
||||||
|
port = "http"
|
||||||
|
provider = "nomad"
|
||||||
|
|
||||||
|
check {
|
||||||
|
type = "http"
|
||||||
|
path = "/"
|
||||||
|
interval = "10s"
|
||||||
|
timeout = "3s"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Caddy task (S5.1, issue #988) ─────────────────────────────────────
|
||||||
|
task "caddy" {
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
# Use pre-built disinto/edge:local image (custom Dockerfile adds
|
||||||
|
# bash, jq, curl, git, docker-cli, python3, openssh-client, autossh).
|
||||||
|
image = "disinto/edge:local"
|
||||||
|
force_pull = false
|
||||||
|
ports = ["http", "https"]
|
||||||
|
|
||||||
|
# apparmor=unconfined matches docker-compose — needed for autossh
|
||||||
|
# in the entrypoint script.
|
||||||
|
security_opt = ["apparmor=unconfined"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mount caddy-data volume for ACME state and config directory.
|
||||||
|
# Caddyfile is mounted at /etc/caddy/Caddyfile by entrypoint-edge.sh.
|
||||||
|
volume_mount {
|
||||||
|
volume = "caddy-data"
|
||||||
|
destination = "/data"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Non-secret env ───────────────────────────────────────────────────
|
||||||
|
env {
|
||||||
|
FORGE_URL = "http://forgejo:3000"
|
||||||
|
FORGE_REPO = "disinto-admin/disinto"
|
||||||
|
DISINTO_CONTAINER = "1"
|
||||||
|
PROJECT_NAME = "disinto"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Caddy needs CPU + memory headroom for reverse proxy work.
|
||||||
|
resources {
|
||||||
|
cpu = 200
|
||||||
|
memory = 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Dispatcher task (S5.1, issue #988) ────────────────────────────────
|
||||||
|
task "dispatcher" {
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
# Use same disinto/agents:local image as other agents.
|
||||||
|
image = "disinto/agents:local"
|
||||||
|
force_pull = false
|
||||||
|
|
||||||
|
# apparmor=unconfined matches docker-compose.
|
||||||
|
security_opt = ["apparmor=unconfined"]
|
||||||
|
|
||||||
|
# Mount docker.sock via bind-volume (not host volume) for legacy
|
||||||
|
# docker backend compat. Nomad host volumes require named volumes
|
||||||
|
# from client.hcl; socket files cannot be host volumes.
|
||||||
|
volumes = ["/var/run/docker.sock:/var/run/docker.sock:ro"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mount ops-repo for vault actions polling.
|
||||||
|
volume_mount {
|
||||||
|
volume = "ops-repo"
|
||||||
|
destination = "/home/agent/repos/disinto-ops"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Vault-templated secrets (S5.1, issue #988) ──────────────────────
|
||||||
|
# Renders FORGE_TOKEN from Vault KV v2 for ops repo access.
|
||||||
|
template {
|
||||||
|
destination = "secrets/dispatcher.env"
|
||||||
|
env = true
|
||||||
|
change_mode = "restart"
|
||||||
|
error_on_missing_key = false
|
||||||
|
data = <<EOT
|
||||||
|
{{- with secret "kv/data/disinto/bots/vault" -}}
|
||||||
|
FORGE_TOKEN={{ .Data.data.token }}
|
||||||
|
{{- else -}}
|
||||||
|
# WARNING: kv/disinto/bots/vault is empty — run tools/vault-seed-agents.sh
|
||||||
|
FORGE_TOKEN=seed-me
|
||||||
|
{{- end }}
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Non-secret env ───────────────────────────────────────────────────
|
||||||
|
env {
|
||||||
|
DISPATCHER_BACKEND = "nomad"
|
||||||
|
FORGE_URL = "http://forgejo:3000"
|
||||||
|
FORGE_REPO = "disinto-admin/disinto"
|
||||||
|
FORGE_OPS_REPO = "disinto-admin/disinto-ops"
|
||||||
|
PRIMARY_BRANCH = "main"
|
||||||
|
DISINTO_CONTAINER = "1"
|
||||||
|
OPS_REPO_ROOT = "/home/agent/repos/disinto-ops"
|
||||||
|
FORGE_ADMIN_USERS = "vault-bot,admin"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Dispatcher is lightweight — minimal CPU + memory.
|
||||||
|
resources {
|
||||||
|
cpu = 100
|
||||||
|
memory = 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
132
nomad/jobs/vault-runner.hcl
Normal file
132
nomad/jobs/vault-runner.hcl
Normal file
|
|
@ -0,0 +1,132 @@
|
||||||
|
# =============================================================================
|
||||||
|
# nomad/jobs/vault-runner.hcl — Parameterized batch job for vault action dispatch
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S5.3, issue #990). Replaces the
|
||||||
|
# `docker run --rm vault-runner-${action_id}` pattern in dispatcher.sh with
|
||||||
|
# a Nomad-native parameterized batch job. Dispatched by the edge dispatcher
|
||||||
|
# (S5.4) via `nomad job dispatch`.
|
||||||
|
#
|
||||||
|
# Parameterized meta:
|
||||||
|
# action_id — vault action identifier (used by entrypoint-runner.sh)
|
||||||
|
# secrets_csv — comma-separated secret names (e.g. "GITHUB_TOKEN,DEPLOY_KEY")
|
||||||
|
#
|
||||||
|
# Vault integration (approach A — pre-defined templates):
|
||||||
|
# All 6 known runner secrets are rendered via template stanzas with
|
||||||
|
# error_on_missing_key = false. Secrets not granted by the dispatch's
|
||||||
|
# Vault policies render as empty strings. The dispatcher (S5.4) sets
|
||||||
|
# vault { policies = [...] } per-dispatch based on the action TOML's
|
||||||
|
# secrets=[...] list, scoping access to only the declared secrets.
|
||||||
|
#
|
||||||
|
# Cleanup: Nomad garbage-collects completed batch dispatches automatically.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
job "vault-runner" {
|
||||||
|
type = "batch"
|
||||||
|
datacenters = ["dc1"]
|
||||||
|
|
||||||
|
parameterized {
|
||||||
|
meta_required = ["action_id", "secrets_csv"]
|
||||||
|
}
|
||||||
|
|
||||||
|
group "runner" {
|
||||||
|
count = 1
|
||||||
|
|
||||||
|
# ── Vault workload identity ──────────────────────────────────────────────
|
||||||
|
# Per-dispatch policies are composed by the dispatcher (S5.4) based on the
|
||||||
|
# action TOML's secrets=[...] list. Each policy grants read access to
|
||||||
|
# exactly one kv/data/disinto/runner/<NAME> path. Roles defined in
|
||||||
|
# vault/roles.yaml (runner-<NAME>), policies in vault/policies/.
|
||||||
|
vault {}
|
||||||
|
|
||||||
|
volume "ops-repo" {
|
||||||
|
type = "host"
|
||||||
|
source = "ops-repo"
|
||||||
|
read_only = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# No restart for batch — fail fast, let the dispatcher handle retries.
|
||||||
|
restart {
|
||||||
|
attempts = 0
|
||||||
|
mode = "fail"
|
||||||
|
}
|
||||||
|
|
||||||
|
task "runner" {
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "disinto/agents:local"
|
||||||
|
force_pull = false
|
||||||
|
entrypoint = ["bash"]
|
||||||
|
args = [
|
||||||
|
"/home/agent/disinto/docker/runner/entrypoint-runner.sh",
|
||||||
|
"${NOMAD_META_action_id}",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
volume_mount {
|
||||||
|
volume = "ops-repo"
|
||||||
|
destination = "/home/agent/ops"
|
||||||
|
read_only = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Non-secret env ───────────────────────────────────────────────────────
|
||||||
|
env {
|
||||||
|
DISINTO_CONTAINER = "1"
|
||||||
|
FACTORY_ROOT = "/home/agent/disinto"
|
||||||
|
OPS_REPO_ROOT = "/home/agent/ops"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Vault-templated runner secrets (approach A) ────────────────────────
|
||||||
|
# Pre-defined templates for all 6 known runner secrets. Each renders
|
||||||
|
# from kv/data/disinto/runner/<NAME>. Secrets not granted by the
|
||||||
|
# dispatch's Vault policies produce empty env vars (harmless).
|
||||||
|
# error_on_missing_key = false prevents template-pending hangs when
|
||||||
|
# a secret path is absent or the policy doesn't grant access.
|
||||||
|
#
|
||||||
|
# Placeholder values kept < 16 chars to avoid secret-scan CI failures.
|
||||||
|
template {
|
||||||
|
destination = "secrets/runner.env"
|
||||||
|
env = true
|
||||||
|
error_on_missing_key = false
|
||||||
|
data = <<EOT
|
||||||
|
{{- with secret "kv/data/disinto/runner/GITHUB_TOKEN" -}}
|
||||||
|
GITHUB_TOKEN={{ .Data.data.value }}
|
||||||
|
{{- else -}}
|
||||||
|
GITHUB_TOKEN=
|
||||||
|
{{- end }}
|
||||||
|
{{- with secret "kv/data/disinto/runner/CODEBERG_TOKEN" -}}
|
||||||
|
CODEBERG_TOKEN={{ .Data.data.value }}
|
||||||
|
{{- else -}}
|
||||||
|
CODEBERG_TOKEN=
|
||||||
|
{{- end }}
|
||||||
|
{{- with secret "kv/data/disinto/runner/CLAWHUB_TOKEN" -}}
|
||||||
|
CLAWHUB_TOKEN={{ .Data.data.value }}
|
||||||
|
{{- else -}}
|
||||||
|
CLAWHUB_TOKEN=
|
||||||
|
{{- end }}
|
||||||
|
{{- with secret "kv/data/disinto/runner/DEPLOY_KEY" -}}
|
||||||
|
DEPLOY_KEY={{ .Data.data.value }}
|
||||||
|
{{- else -}}
|
||||||
|
DEPLOY_KEY=
|
||||||
|
{{- end }}
|
||||||
|
{{- with secret "kv/data/disinto/runner/NPM_TOKEN" -}}
|
||||||
|
NPM_TOKEN={{ .Data.data.value }}
|
||||||
|
{{- else -}}
|
||||||
|
NPM_TOKEN=
|
||||||
|
{{- end }}
|
||||||
|
{{- with secret "kv/data/disinto/runner/DOCKER_HUB_TOKEN" -}}
|
||||||
|
DOCKER_HUB_TOKEN={{ .Data.data.value }}
|
||||||
|
{{- else -}}
|
||||||
|
DOCKER_HUB_TOKEN=
|
||||||
|
{{- end }}
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
|
# Formula execution headroom — matches agents.hcl baseline.
|
||||||
|
resources {
|
||||||
|
cpu = 500
|
||||||
|
memory = 1024
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
29
vault/policies/service-dispatcher.hcl
Normal file
29
vault/policies/service-dispatcher.hcl
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
# vault/policies/service-dispatcher.hcl
|
||||||
|
#
|
||||||
|
# Edge dispatcher policy: needs to enumerate the runner secret namespace
|
||||||
|
# (to check secret presence before dispatching) and read the shared
|
||||||
|
# ops-repo credentials (token + clone URL) it uses to fetch action TOMLs.
|
||||||
|
#
|
||||||
|
# Scope:
|
||||||
|
# - kv/disinto/runner/* — read all per-secret values + list keys
|
||||||
|
# - kv/disinto/shared/ops-repo/* — read the ops-repo creds bundle
|
||||||
|
#
|
||||||
|
# The actual ephemeral runner container created per dispatch gets the
|
||||||
|
# narrow runner-<NAME> policies, NOT this one. This policy stays bound
|
||||||
|
# to the long-running dispatcher only.
|
||||||
|
|
||||||
|
path "kv/data/disinto/runner/*" {
|
||||||
|
capabilities = ["read"]
|
||||||
|
}
|
||||||
|
|
||||||
|
path "kv/metadata/disinto/runner/*" {
|
||||||
|
capabilities = ["list", "read"]
|
||||||
|
}
|
||||||
|
|
||||||
|
path "kv/data/disinto/shared/ops-repo" {
|
||||||
|
capabilities = ["read"]
|
||||||
|
}
|
||||||
|
|
||||||
|
path "kv/metadata/disinto/shared/ops-repo" {
|
||||||
|
capabilities = ["list", "read"]
|
||||||
|
}
|
||||||
|
|
@ -121,10 +121,10 @@ roles:
|
||||||
job_id: bot-vault
|
job_id: bot-vault
|
||||||
|
|
||||||
# ── Edge dispatcher ────────────────────────────────────────────────────────
|
# ── Edge dispatcher ────────────────────────────────────────────────────────
|
||||||
- name: dispatcher
|
- name: service-dispatcher
|
||||||
policy: dispatcher
|
policy: service-dispatcher
|
||||||
namespace: default
|
namespace: default
|
||||||
job_id: dispatcher
|
job_id: edge
|
||||||
|
|
||||||
# ── Per-secret runner roles ────────────────────────────────────────────────
|
# ── Per-secret runner roles ────────────────────────────────────────────────
|
||||||
# vault-runner (Step 5) composes runner-<NAME> policies onto each
|
# vault-runner (Step 5) composes runner-<NAME> policies onto each
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue