fix: [nomad-prep] P1 — run all 7 bot roles on llama backend (gates migration) (#801) #816
5 changed files with 178 additions and 9 deletions
|
|
@ -119,6 +119,7 @@ bash dev/phase-test.sh
|
|||
| Triage | `docker/reproduce/` | Deep root cause analysis | `formulas/triage.toml` |
|
||||
| Edge dispatcher | `docker/edge/` | Polls ops repo for vault actions, executes via Claude sessions | `docker/edge/dispatcher.sh` |
|
||||
| agents-llama | `docker/agents/` (same image) | Local-Qwen dev agent (`AGENT_ROLES=dev`), gated on `ENABLE_LLAMA_AGENT=1` | [docs/agents-llama.md](docs/agents-llama.md) |
|
||||
| agents-llama-all | `docker/agents/` (same image) | Local-Qwen all-roles agent (all 7 roles), profile `agents-llama-all` | [docs/agents-llama.md](docs/agents-llama.md) |
|
||||
|
||||
> **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77).
|
||||
> See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow details.
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ services:
|
|||
- GARDENER_INTERVAL=${GARDENER_INTERVAL:-21600}
|
||||
- ARCHITECT_INTERVAL=${ARCHITECT_INTERVAL:-21600}
|
||||
- PLANNER_INTERVAL=${PLANNER_INTERVAL:-43200}
|
||||
- SUPERVISOR_INTERVAL=${SUPERVISOR_INTERVAL:-1200}
|
||||
healthcheck:
|
||||
test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
|
||||
interval: 60s
|
||||
|
|
@ -123,6 +124,74 @@ services:
|
|||
networks:
|
||||
- disinto-net
|
||||
|
||||
agents-llama-all:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/agents/Dockerfile
|
||||
image: disinto/agents-llama:latest
|
||||
container_name: disinto-agents-llama-all
|
||||
restart: unless-stopped
|
||||
profiles: ["agents-llama-all"]
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
volumes:
|
||||
- agent-data:/home/agent/data
|
||||
- project-repos:/home/agent/repos
|
||||
- ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
|
||||
- ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro
|
||||
- ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
|
||||
- ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro
|
||||
- ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro
|
||||
- woodpecker-data:/woodpecker-data:ro
|
||||
environment:
|
||||
- FORGE_URL=http://forgejo:3000
|
||||
- FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
|
||||
- FORGE_TOKEN=${FORGE_TOKEN_LLAMA:-}
|
||||
- FORGE_PASS=${FORGE_PASS_LLAMA:-}
|
||||
- FORGE_REVIEW_TOKEN=${FORGE_REVIEW_TOKEN:-}
|
||||
- FORGE_PLANNER_TOKEN=${FORGE_PLANNER_TOKEN:-}
|
||||
- FORGE_GARDENER_TOKEN=${FORGE_GARDENER_TOKEN:-}
|
||||
- FORGE_VAULT_TOKEN=${FORGE_VAULT_TOKEN:-}
|
||||
- FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-}
|
||||
- FORGE_PREDICTOR_TOKEN=${FORGE_PREDICTOR_TOKEN:-}
|
||||
- FORGE_ARCHITECT_TOKEN=${FORGE_ARCHITECT_TOKEN:-}
|
||||
- FORGE_FILER_TOKEN=${FORGE_FILER_TOKEN:-}
|
||||
- FORGE_BOT_USERNAMES=${FORGE_BOT_USERNAMES:-}
|
||||
- WOODPECKER_TOKEN=${WOODPECKER_TOKEN:-}
|
||||
- CLAUDE_TIMEOUT=${CLAUDE_TIMEOUT:-7200}
|
||||
- CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
|
||||
- CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=60
|
||||
- CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-}
|
||||
- FORGE_ADMIN_PASS=${FORGE_ADMIN_PASS:-}
|
||||
- DISINTO_CONTAINER=1
|
||||
- PROJECT_TOML=projects/disinto.toml
|
||||
- PROJECT_NAME=${PROJECT_NAME:-project}
|
||||
- PROJECT_REPO_ROOT=/home/agent/repos/${PROJECT_NAME:-project}
|
||||
- WOODPECKER_DATA_DIR=/woodpecker-data
|
||||
- WOODPECKER_REPO_ID=${WOODPECKER_REPO_ID:-}
|
||||
- CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
|
||||
- POLL_INTERVAL=${POLL_INTERVAL:-300}
|
||||
- GARDENER_INTERVAL=${GARDENER_INTERVAL:-21600}
|
||||
- ARCHITECT_INTERVAL=${ARCHITECT_INTERVAL:-21600}
|
||||
- PLANNER_INTERVAL=${PLANNER_INTERVAL:-43200}
|
||||
- SUPERVISOR_INTERVAL=${SUPERVISOR_INTERVAL:-1200}
|
||||
- AGENT_ROLES=review,dev,gardener,architect,planner,predictor,supervisor
|
||||
healthcheck:
|
||||
test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
|
||||
interval: 60s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
depends_on:
|
||||
forgejo:
|
||||
condition: service_healthy
|
||||
woodpecker:
|
||||
condition: service_started
|
||||
networks:
|
||||
- disinto-net
|
||||
|
||||
reproduce:
|
||||
build:
|
||||
context: .
|
||||
|
|
|
|||
|
|
@ -7,14 +7,15 @@ set -euo pipefail
|
|||
# poll scripts. All Docker Compose env vars are inherited (PATH, FORGE_TOKEN,
|
||||
# ANTHROPIC_API_KEY, etc.).
|
||||
#
|
||||
# AGENT_ROLES env var controls which scripts run: "review,dev,gardener,architect,planner,predictor"
|
||||
# (default: all six). Uses while-true loop with staggered intervals:
|
||||
# AGENT_ROLES env var controls which scripts run: "review,dev,gardener,architect,planner,predictor,supervisor"
|
||||
# (default: all seven). Uses while-true loop with staggered intervals:
|
||||
# - review-poll: every 5 minutes (offset by 0s)
|
||||
# - dev-poll: every 5 minutes (offset by 2 minutes)
|
||||
# - gardener: every GARDENER_INTERVAL seconds (default: 21600 = 6 hours)
|
||||
# - architect: every ARCHITECT_INTERVAL seconds (default: 21600 = 6 hours)
|
||||
# - planner: every PLANNER_INTERVAL seconds (default: 43200 = 12 hours)
|
||||
# - predictor: every 24 hours (288 iterations * 5 min)
|
||||
# - supervisor: every SUPERVISOR_INTERVAL seconds (default: 1200 = 20 min)
|
||||
|
||||
DISINTO_BAKED="/home/agent/disinto"
|
||||
DISINTO_LIVE="/home/agent/repos/_factory"
|
||||
|
|
@ -328,7 +329,7 @@ init_state_dir
|
|||
|
||||
# Parse AGENT_ROLES env var (default: all agents)
|
||||
# Expected format: comma-separated list like "review,dev,gardener"
|
||||
AGENT_ROLES="${AGENT_ROLES:-review,dev,gardener,architect,planner,predictor}"
|
||||
AGENT_ROLES="${AGENT_ROLES:-review,dev,gardener,architect,planner,predictor,supervisor}"
|
||||
log "Agent roles configured: ${AGENT_ROLES}"
|
||||
|
||||
# Poll interval in seconds (5 minutes default)
|
||||
|
|
@ -338,9 +339,10 @@ POLL_INTERVAL="${POLL_INTERVAL:-300}"
|
|||
GARDENER_INTERVAL="${GARDENER_INTERVAL:-21600}"
|
||||
ARCHITECT_INTERVAL="${ARCHITECT_INTERVAL:-21600}"
|
||||
PLANNER_INTERVAL="${PLANNER_INTERVAL:-43200}"
|
||||
SUPERVISOR_INTERVAL="${SUPERVISOR_INTERVAL:-1200}"
|
||||
|
||||
log "Entering polling loop (interval: ${POLL_INTERVAL}s, roles: ${AGENT_ROLES})"
|
||||
log "Gardener interval: ${GARDENER_INTERVAL}s, Architect interval: ${ARCHITECT_INTERVAL}s, Planner interval: ${PLANNER_INTERVAL}s"
|
||||
log "Gardener interval: ${GARDENER_INTERVAL}s, Architect interval: ${ARCHITECT_INTERVAL}s, Planner interval: ${PLANNER_INTERVAL}s, Supervisor interval: ${SUPERVISOR_INTERVAL}s"
|
||||
|
||||
# Main polling loop using iteration counter for gardener scheduling
|
||||
iteration=0
|
||||
|
|
@ -463,6 +465,19 @@ print(cfg.get('primary_branch', 'main'))
|
|||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Supervisor (interval configurable via SUPERVISOR_INTERVAL env var, default 20 min)
|
||||
if [[ ",${AGENT_ROLES}," == *",supervisor,"* ]]; then
|
||||
supervisor_iteration=$((iteration * POLL_INTERVAL))
|
||||
if [ $((supervisor_iteration % SUPERVISOR_INTERVAL)) -eq 0 ] && [ "$now" -ge "$supervisor_iteration" ]; then
|
||||
if ! pgrep -f "supervisor-run.sh" >/dev/null; then
|
||||
log "Running supervisor (iteration ${iteration}, ${SUPERVISOR_INTERVAL}s interval) for ${toml}"
|
||||
gosu agent bash -c "cd ${DISINTO_DIR} && bash supervisor/supervisor-run.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/supervisor.log" 2>&1 &
|
||||
else
|
||||
log "Skipping supervisor — already running"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
sleep "${POLL_INTERVAL}"
|
||||
|
|
|
|||
|
|
@ -1,10 +1,17 @@
|
|||
# agents-llama — Local-Qwen Dev Agent
|
||||
# agents-llama — Local-Qwen Agents
|
||||
|
||||
The `agents-llama` service is an optional compose service that runs a dev agent
|
||||
The `agents-llama` service is an optional compose service that runs agents
|
||||
backed by a local llama-server instance (e.g. Qwen) instead of the Anthropic
|
||||
API. It uses the same Docker image as the main `agents` service but connects to
|
||||
a local inference endpoint via `ANTHROPIC_BASE_URL`.
|
||||
|
||||
Two profiles are available:
|
||||
|
||||
| Profile | Service | Roles | Use case |
|
||||
|---------|---------|-------|----------|
|
||||
| _(default)_ | `agents-llama` | `dev` only | Conservative: single-role soak test |
|
||||
| `agents-llama-all` | `agents-llama-all` | all 7 (review, dev, gardener, architect, planner, predictor, supervisor) | Pre-migration: validate every role on llama before Nomad cutover |
|
||||
|
||||
## Enabling
|
||||
|
||||
Set `ENABLE_LLAMA_AGENT=1` in `.env` (or `.env.enc`) and provide the required
|
||||
|
|
@ -19,6 +26,17 @@ ANTHROPIC_BASE_URL=http://host.docker.internal:8081 # llama-server endpoint
|
|||
|
||||
Then regenerate the compose file (`disinto init ...`) and bring the stack up.
|
||||
|
||||
### Running all 7 roles (agents-llama-all)
|
||||
|
||||
```bash
|
||||
docker compose --profile agents-llama-all up -d
|
||||
```
|
||||
|
||||
This starts the `agents-llama-all` container with all 7 bot roles against the
|
||||
local llama endpoint. The per-role forge tokens (`FORGE_REVIEW_TOKEN`,
|
||||
`FORGE_GARDENER_TOKEN`, etc.) must be set in `.env` — they are the same tokens
|
||||
used by the Claude-backed `agents` container.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- **llama-server** (or compatible OpenAI-API endpoint) running on the host,
|
||||
|
|
@ -28,11 +46,10 @@ Then regenerate the compose file (`disinto init ...`) and bring the stack up.
|
|||
|
||||
## Behaviour
|
||||
|
||||
- `AGENT_ROLES=dev` — the llama agent only picks up dev work.
|
||||
- `agents-llama`: `AGENT_ROLES=dev` — only picks up dev work.
|
||||
- `agents-llama-all`: `AGENT_ROLES=review,dev,gardener,architect,planner,predictor,supervisor` — runs all 7 roles.
|
||||
- `CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=60` — more aggressive compaction for smaller
|
||||
context windows.
|
||||
- `depends_on: forgejo (service_healthy)` — does **not** depend on Woodpecker
|
||||
(the llama agent doesn't need CI).
|
||||
- Serialises on the llama-server's single KV cache (AD-002).
|
||||
|
||||
## Disabling
|
||||
|
|
|
|||
|
|
@ -140,6 +140,7 @@ _generate_local_model_services() {
|
|||
GARDENER_INTERVAL: "${GARDENER_INTERVAL:-21600}"
|
||||
ARCHITECT_INTERVAL: "${ARCHITECT_INTERVAL:-21600}"
|
||||
PLANNER_INTERVAL: "${PLANNER_INTERVAL:-43200}"
|
||||
SUPERVISOR_INTERVAL: "${SUPERVISOR_INTERVAL:-1200}"
|
||||
depends_on:
|
||||
forgejo:
|
||||
condition: service_healthy
|
||||
|
|
@ -451,6 +452,72 @@ COMPOSEEOF
|
|||
condition: service_healthy
|
||||
networks:
|
||||
- disinto-net
|
||||
|
||||
agents-llama-all:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/agents/Dockerfile
|
||||
container_name: disinto-agents-llama-all
|
||||
restart: unless-stopped
|
||||
profiles: ["agents-llama-all"]
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
volumes:
|
||||
- agent-data:/home/agent/data
|
||||
- project-repos:/home/agent/repos
|
||||
- ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
|
||||
- ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro
|
||||
- ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
|
||||
- ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro
|
||||
- ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro
|
||||
- woodpecker-data:/woodpecker-data:ro
|
||||
environment:
|
||||
FORGE_URL: http://forgejo:3000
|
||||
FORGE_REPO: ${FORGE_REPO:-disinto-admin/disinto}
|
||||
FORGE_TOKEN: ${FORGE_TOKEN_LLAMA:-}
|
||||
FORGE_PASS: ${FORGE_PASS_LLAMA:-}
|
||||
FORGE_REVIEW_TOKEN: ${FORGE_REVIEW_TOKEN:-}
|
||||
FORGE_PLANNER_TOKEN: ${FORGE_PLANNER_TOKEN:-}
|
||||
FORGE_GARDENER_TOKEN: ${FORGE_GARDENER_TOKEN:-}
|
||||
FORGE_VAULT_TOKEN: ${FORGE_VAULT_TOKEN:-}
|
||||
FORGE_SUPERVISOR_TOKEN: ${FORGE_SUPERVISOR_TOKEN:-}
|
||||
FORGE_PREDICTOR_TOKEN: ${FORGE_PREDICTOR_TOKEN:-}
|
||||
FORGE_ARCHITECT_TOKEN: ${FORGE_ARCHITECT_TOKEN:-}
|
||||
FORGE_FILER_TOKEN: ${FORGE_FILER_TOKEN:-}
|
||||
FORGE_BOT_USERNAMES: ${FORGE_BOT_USERNAMES:-}
|
||||
WOODPECKER_TOKEN: ${WOODPECKER_TOKEN:-}
|
||||
CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200}
|
||||
CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
|
||||
CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "60"
|
||||
CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: "1"
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
ANTHROPIC_BASE_URL: ${ANTHROPIC_BASE_URL:-}
|
||||
FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-}
|
||||
DISINTO_CONTAINER: "1"
|
||||
PROJECT_NAME: ${PROJECT_NAME:-project}
|
||||
PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
|
||||
WOODPECKER_DATA_DIR: /woodpecker-data
|
||||
WOODPECKER_REPO_ID: "PLACEHOLDER_WP_REPO_ID"
|
||||
CLAUDE_CONFIG_DIR: ${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
|
||||
POLL_INTERVAL: ${POLL_INTERVAL:-300}
|
||||
GARDENER_INTERVAL: ${GARDENER_INTERVAL:-21600}
|
||||
ARCHITECT_INTERVAL: ${ARCHITECT_INTERVAL:-21600}
|
||||
PLANNER_INTERVAL: ${PLANNER_INTERVAL:-43200}
|
||||
SUPERVISOR_INTERVAL: ${SUPERVISOR_INTERVAL:-1200}
|
||||
AGENT_ROLES: review,dev,gardener,architect,planner,predictor,supervisor
|
||||
healthcheck:
|
||||
test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
|
||||
interval: 60s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
depends_on:
|
||||
forgejo:
|
||||
condition: service_healthy
|
||||
woodpecker:
|
||||
condition: service_started
|
||||
networks:
|
||||
- disinto-net
|
||||
LLAMAEOF
|
||||
fi
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue