diff --git a/AGENTS.md b/AGENTS.md index d76df7c..735879f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -119,6 +119,7 @@ bash dev/phase-test.sh | Triage | `docker/reproduce/` | Deep root cause analysis | `formulas/triage.toml` | | Edge dispatcher | `docker/edge/` | Polls ops repo for vault actions, executes via Claude sessions | `docker/edge/dispatcher.sh` | | agents-llama | `docker/agents/` (same image) | Local-Qwen dev agent (`AGENT_ROLES=dev`), gated on `ENABLE_LLAMA_AGENT=1` | [docs/agents-llama.md](docs/agents-llama.md) | +| agents-llama-all | `docker/agents/` (same image) | Local-Qwen all-roles agent (all 7 roles), profile `agents-llama-all` | [docs/agents-llama.md](docs/agents-llama.md) | > **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77). > See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow details. diff --git a/docker-compose.yml b/docker-compose.yml index ba6a1fd..ba8c77c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,6 +49,7 @@ services: - GARDENER_INTERVAL=${GARDENER_INTERVAL:-21600} - ARCHITECT_INTERVAL=${ARCHITECT_INTERVAL:-21600} - PLANNER_INTERVAL=${PLANNER_INTERVAL:-43200} + - SUPERVISOR_INTERVAL=${SUPERVISOR_INTERVAL:-1200} healthcheck: test: ["CMD", "pgrep", "-f", "entrypoint.sh"] interval: 60s @@ -123,6 +124,74 @@ services: networks: - disinto-net + agents-llama-all: + build: + context: . + dockerfile: docker/agents/Dockerfile + image: disinto/agents-llama:latest + container_name: disinto-agents-llama-all + restart: unless-stopped + profiles: ["agents-llama-all"] + security_opt: + - apparmor=unconfined + volumes: + - agent-data:/home/agent/data + - project-repos:/home/agent/repos + - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared} + - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro + - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro + - ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro + - ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro + - woodpecker-data:/woodpecker-data:ro + environment: + - FORGE_URL=http://forgejo:3000 + - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto} + - FORGE_TOKEN=${FORGE_TOKEN_LLAMA:-} + - FORGE_PASS=${FORGE_PASS_LLAMA:-} + - FORGE_REVIEW_TOKEN=${FORGE_REVIEW_TOKEN:-} + - FORGE_PLANNER_TOKEN=${FORGE_PLANNER_TOKEN:-} + - FORGE_GARDENER_TOKEN=${FORGE_GARDENER_TOKEN:-} + - FORGE_VAULT_TOKEN=${FORGE_VAULT_TOKEN:-} + - FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-} + - FORGE_PREDICTOR_TOKEN=${FORGE_PREDICTOR_TOKEN:-} + - FORGE_ARCHITECT_TOKEN=${FORGE_ARCHITECT_TOKEN:-} + - FORGE_FILER_TOKEN=${FORGE_FILER_TOKEN:-} + - FORGE_BOT_USERNAMES=${FORGE_BOT_USERNAMES:-} + - WOODPECKER_TOKEN=${WOODPECKER_TOKEN:-} + - CLAUDE_TIMEOUT=${CLAUDE_TIMEOUT:-7200} + - CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1} + - CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=60 + - CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} + - ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-} + - FORGE_ADMIN_PASS=${FORGE_ADMIN_PASS:-} + - DISINTO_CONTAINER=1 + - PROJECT_TOML=projects/disinto.toml + - PROJECT_NAME=${PROJECT_NAME:-project} + - PROJECT_REPO_ROOT=/home/agent/repos/${PROJECT_NAME:-project} + - WOODPECKER_DATA_DIR=/woodpecker-data + - WOODPECKER_REPO_ID=${WOODPECKER_REPO_ID:-} + - CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config} + - POLL_INTERVAL=${POLL_INTERVAL:-300} + - GARDENER_INTERVAL=${GARDENER_INTERVAL:-21600} + - ARCHITECT_INTERVAL=${ARCHITECT_INTERVAL:-21600} + - PLANNER_INTERVAL=${PLANNER_INTERVAL:-43200} + - SUPERVISOR_INTERVAL=${SUPERVISOR_INTERVAL:-1200} + - AGENT_ROLES=review,dev,gardener,architect,planner,predictor,supervisor + healthcheck: + test: ["CMD", "pgrep", "-f", "entrypoint.sh"] + interval: 60s + timeout: 5s + retries: 3 + start_period: 30s + depends_on: + forgejo: + condition: service_healthy + woodpecker: + condition: service_started + networks: + - disinto-net + reproduce: build: context: . diff --git a/docker/agents/entrypoint.sh b/docker/agents/entrypoint.sh index 9df6d01..b7593a2 100644 --- a/docker/agents/entrypoint.sh +++ b/docker/agents/entrypoint.sh @@ -7,14 +7,15 @@ set -euo pipefail # poll scripts. All Docker Compose env vars are inherited (PATH, FORGE_TOKEN, # ANTHROPIC_API_KEY, etc.). # -# AGENT_ROLES env var controls which scripts run: "review,dev,gardener,architect,planner,predictor" -# (default: all six). Uses while-true loop with staggered intervals: +# AGENT_ROLES env var controls which scripts run: "review,dev,gardener,architect,planner,predictor,supervisor" +# (default: all seven). Uses while-true loop with staggered intervals: # - review-poll: every 5 minutes (offset by 0s) # - dev-poll: every 5 minutes (offset by 2 minutes) # - gardener: every GARDENER_INTERVAL seconds (default: 21600 = 6 hours) # - architect: every ARCHITECT_INTERVAL seconds (default: 21600 = 6 hours) # - planner: every PLANNER_INTERVAL seconds (default: 43200 = 12 hours) # - predictor: every 24 hours (288 iterations * 5 min) +# - supervisor: every SUPERVISOR_INTERVAL seconds (default: 1200 = 20 min) DISINTO_BAKED="/home/agent/disinto" DISINTO_LIVE="/home/agent/repos/_factory" @@ -328,7 +329,7 @@ init_state_dir # Parse AGENT_ROLES env var (default: all agents) # Expected format: comma-separated list like "review,dev,gardener" -AGENT_ROLES="${AGENT_ROLES:-review,dev,gardener,architect,planner,predictor}" +AGENT_ROLES="${AGENT_ROLES:-review,dev,gardener,architect,planner,predictor,supervisor}" log "Agent roles configured: ${AGENT_ROLES}" # Poll interval in seconds (5 minutes default) @@ -338,9 +339,10 @@ POLL_INTERVAL="${POLL_INTERVAL:-300}" GARDENER_INTERVAL="${GARDENER_INTERVAL:-21600}" ARCHITECT_INTERVAL="${ARCHITECT_INTERVAL:-21600}" PLANNER_INTERVAL="${PLANNER_INTERVAL:-43200}" +SUPERVISOR_INTERVAL="${SUPERVISOR_INTERVAL:-1200}" log "Entering polling loop (interval: ${POLL_INTERVAL}s, roles: ${AGENT_ROLES})" -log "Gardener interval: ${GARDENER_INTERVAL}s, Architect interval: ${ARCHITECT_INTERVAL}s, Planner interval: ${PLANNER_INTERVAL}s" +log "Gardener interval: ${GARDENER_INTERVAL}s, Architect interval: ${ARCHITECT_INTERVAL}s, Planner interval: ${PLANNER_INTERVAL}s, Supervisor interval: ${SUPERVISOR_INTERVAL}s" # Main polling loop using iteration counter for gardener scheduling iteration=0 @@ -463,6 +465,19 @@ print(cfg.get('primary_branch', 'main')) fi fi fi + + # Supervisor (interval configurable via SUPERVISOR_INTERVAL env var, default 20 min) + if [[ ",${AGENT_ROLES}," == *",supervisor,"* ]]; then + supervisor_iteration=$((iteration * POLL_INTERVAL)) + if [ $((supervisor_iteration % SUPERVISOR_INTERVAL)) -eq 0 ] && [ "$now" -ge "$supervisor_iteration" ]; then + if ! pgrep -f "supervisor-run.sh" >/dev/null; then + log "Running supervisor (iteration ${iteration}, ${SUPERVISOR_INTERVAL}s interval) for ${toml}" + gosu agent bash -c "cd ${DISINTO_DIR} && bash supervisor/supervisor-run.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/supervisor.log" 2>&1 & + else + log "Skipping supervisor — already running" + fi + fi + fi done sleep "${POLL_INTERVAL}" diff --git a/docs/agents-llama.md b/docs/agents-llama.md index 6764360..88622a7 100644 --- a/docs/agents-llama.md +++ b/docs/agents-llama.md @@ -1,10 +1,17 @@ -# agents-llama — Local-Qwen Dev Agent +# agents-llama — Local-Qwen Agents -The `agents-llama` service is an optional compose service that runs a dev agent +The `agents-llama` service is an optional compose service that runs agents backed by a local llama-server instance (e.g. Qwen) instead of the Anthropic API. It uses the same Docker image as the main `agents` service but connects to a local inference endpoint via `ANTHROPIC_BASE_URL`. +Two profiles are available: + +| Profile | Service | Roles | Use case | +|---------|---------|-------|----------| +| _(default)_ | `agents-llama` | `dev` only | Conservative: single-role soak test | +| `agents-llama-all` | `agents-llama-all` | all 7 (review, dev, gardener, architect, planner, predictor, supervisor) | Pre-migration: validate every role on llama before Nomad cutover | + ## Enabling Set `ENABLE_LLAMA_AGENT=1` in `.env` (or `.env.enc`) and provide the required @@ -19,6 +26,17 @@ ANTHROPIC_BASE_URL=http://host.docker.internal:8081 # llama-server endpoint Then regenerate the compose file (`disinto init ...`) and bring the stack up. +### Running all 7 roles (agents-llama-all) + +```bash +docker compose --profile agents-llama-all up -d +``` + +This starts the `agents-llama-all` container with all 7 bot roles against the +local llama endpoint. The per-role forge tokens (`FORGE_REVIEW_TOKEN`, +`FORGE_GARDENER_TOKEN`, etc.) must be set in `.env` — they are the same tokens +used by the Claude-backed `agents` container. + ## Prerequisites - **llama-server** (or compatible OpenAI-API endpoint) running on the host, @@ -28,11 +46,10 @@ Then regenerate the compose file (`disinto init ...`) and bring the stack up. ## Behaviour -- `AGENT_ROLES=dev` — the llama agent only picks up dev work. +- `agents-llama`: `AGENT_ROLES=dev` — only picks up dev work. +- `agents-llama-all`: `AGENT_ROLES=review,dev,gardener,architect,planner,predictor,supervisor` — runs all 7 roles. - `CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=60` — more aggressive compaction for smaller context windows. -- `depends_on: forgejo (service_healthy)` — does **not** depend on Woodpecker - (the llama agent doesn't need CI). - Serialises on the llama-server's single KV cache (AD-002). ## Disabling diff --git a/lib/generators.sh b/lib/generators.sh index a4598e1..02af667 100644 --- a/lib/generators.sh +++ b/lib/generators.sh @@ -140,6 +140,7 @@ _generate_local_model_services() { GARDENER_INTERVAL: "${GARDENER_INTERVAL:-21600}" ARCHITECT_INTERVAL: "${ARCHITECT_INTERVAL:-21600}" PLANNER_INTERVAL: "${PLANNER_INTERVAL:-43200}" + SUPERVISOR_INTERVAL: "${SUPERVISOR_INTERVAL:-1200}" depends_on: forgejo: condition: service_healthy @@ -451,6 +452,72 @@ COMPOSEEOF condition: service_healthy networks: - disinto-net + + agents-llama-all: + build: + context: . + dockerfile: docker/agents/Dockerfile + container_name: disinto-agents-llama-all + restart: unless-stopped + profiles: ["agents-llama-all"] + security_opt: + - apparmor=unconfined + volumes: + - agent-data:/home/agent/data + - project-repos:/home/agent/repos + - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared} + - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro + - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro + - ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro + - ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro + - woodpecker-data:/woodpecker-data:ro + environment: + FORGE_URL: http://forgejo:3000 + FORGE_REPO: ${FORGE_REPO:-disinto-admin/disinto} + FORGE_TOKEN: ${FORGE_TOKEN_LLAMA:-} + FORGE_PASS: ${FORGE_PASS_LLAMA:-} + FORGE_REVIEW_TOKEN: ${FORGE_REVIEW_TOKEN:-} + FORGE_PLANNER_TOKEN: ${FORGE_PLANNER_TOKEN:-} + FORGE_GARDENER_TOKEN: ${FORGE_GARDENER_TOKEN:-} + FORGE_VAULT_TOKEN: ${FORGE_VAULT_TOKEN:-} + FORGE_SUPERVISOR_TOKEN: ${FORGE_SUPERVISOR_TOKEN:-} + FORGE_PREDICTOR_TOKEN: ${FORGE_PREDICTOR_TOKEN:-} + FORGE_ARCHITECT_TOKEN: ${FORGE_ARCHITECT_TOKEN:-} + FORGE_FILER_TOKEN: ${FORGE_FILER_TOKEN:-} + FORGE_BOT_USERNAMES: ${FORGE_BOT_USERNAMES:-} + WOODPECKER_TOKEN: ${WOODPECKER_TOKEN:-} + CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200} + CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1} + CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "60" + CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: "1" + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} + ANTHROPIC_BASE_URL: ${ANTHROPIC_BASE_URL:-} + FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-} + DISINTO_CONTAINER: "1" + PROJECT_NAME: ${PROJECT_NAME:-project} + PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} + WOODPECKER_DATA_DIR: /woodpecker-data + WOODPECKER_REPO_ID: "PLACEHOLDER_WP_REPO_ID" + CLAUDE_CONFIG_DIR: ${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config} + POLL_INTERVAL: ${POLL_INTERVAL:-300} + GARDENER_INTERVAL: ${GARDENER_INTERVAL:-21600} + ARCHITECT_INTERVAL: ${ARCHITECT_INTERVAL:-21600} + PLANNER_INTERVAL: ${PLANNER_INTERVAL:-43200} + SUPERVISOR_INTERVAL: ${SUPERVISOR_INTERVAL:-1200} + AGENT_ROLES: review,dev,gardener,architect,planner,predictor,supervisor + healthcheck: + test: ["CMD", "pgrep", "-f", "entrypoint.sh"] + interval: 60s + timeout: 5s + retries: 3 + start_period: 30s + depends_on: + forgejo: + condition: service_healthy + woodpecker: + condition: service_started + networks: + - disinto-net LLAMAEOF fi