Merge pull request 'fix: fix: stop baking credentials into git remote URLs — use clean URLs + existing credential helper everywhere (#604 )' (#633 ) from fix/issue-604 into main

fix: fix: stop baking credentials into git remote URLs — use clean URLs + existing credential helper everywhere (#604 )
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 17:13:06 +00:00 · 2026-04-10 17:04:10 +00:00 · 2026-04-10 16:51:53 +00:00 · 2026-04-10 16:40:44 +00:00 · 2026-04-10 16:23:16 +00:00 · 2026-04-10 16:17:08 +00:00
108 changed files with 10005 additions and 5999 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,20 @@
 # Secrets — prevent .env files from being baked into the image
 .env
 .env.enc
 .env.vault
 .env.vault.enc
 # Version control — .git is huge and not needed in image
 .git
 # Archives — not needed at runtime
 *.tar.gz
 # Prometheus data — large, ephemeral data
 prometheus-data/
 # Compose files — only needed at runtime via volume mount
 docker-compose.yml
 # Project TOML files — gitignored anyway, won't be in build context
 projects/*.toml
--- a/.env.example
+++ b/.env.example
@ -19,14 +19,32 @@ FORGE_URL=http://localhost:3000             # [CONFIG] local Forgejo instance
 # ── Auth tokens ───────────────────────────────────────────────────────────
 # Each agent has its own Forgejo account and API token (#747).
 # Per-agent tokens fall back to FORGE_TOKEN if not set.
 #
 # Tokens and passwords are auto-generated by `disinto init` and stored in .env.
 # Each bot user gets:
 #   - FORGE_TOKEN_<BOT> = API token for REST calls (user identity via /api/v1/user)
 #   - FORGE_PASS_<BOT>  = password for git HTTP push (#361, Forgejo 11.x limitation)
 #
 # Local-model agents (agents-llama) use FORGE_TOKEN_LLAMA / FORGE_PASS_LLAMA
 # with FORGE_BOT_USER_LLAMA=dev-qwen to ensure correct attribution (#563).
 FORGE_TOKEN=                               # [SECRET] dev-bot API token (default for all agents)
 FORGE_PASS=                                # [SECRET] dev-bot password for git HTTP push (#361)
 FORGE_TOKEN_LLAMA=                         # [SECRET] dev-qwen API token (for agents-llama)
 FORGE_PASS_LLAMA=                          # [SECRET] dev-qwen password for git HTTP push
 FORGE_REVIEW_TOKEN=                        # [SECRET] review-bot API token
 FORGE_REVIEW_PASS=                         # [SECRET] review-bot password for git HTTP push
 FORGE_PLANNER_TOKEN=                       # [SECRET] planner-bot API token
 FORGE_PLANNER_PASS=                        # [SECRET] planner-bot password for git HTTP push
 FORGE_GARDENER_TOKEN=                      # [SECRET] gardener-bot API token
 FORGE_GARDENER_PASS=                       # [SECRET] gardener-bot password for git HTTP push
 FORGE_VAULT_TOKEN=                         # [SECRET] vault-bot API token
 FORGE_VAULT_PASS=                          # [SECRET] vault-bot password for git HTTP push
 FORGE_SUPERVISOR_TOKEN=                    # [SECRET] supervisor-bot API token
 FORGE_SUPERVISOR_PASS=                     # [SECRET] supervisor-bot password for git HTTP push
 FORGE_PREDICTOR_TOKEN=                     # [SECRET] predictor-bot API token
 FORGE_PREDICTOR_PASS=                      # [SECRET] predictor-bot password for git HTTP push
 FORGE_ARCHITECT_TOKEN=                     # [SECRET] architect-bot API token
 FORGE_ARCHITECT_PASS=                      # [SECRET] architect-bot password for git HTTP push
 FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot
 # ── Backwards compatibility ───────────────────────────────────────────────
@ -34,6 +52,10 @@ FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,superv
 # CODEBERG_TOKEN automatically (same for REVIEW_BOT_TOKEN, CODEBERG_REPO,
 # CODEBERG_BOT_USERNAMES). No action needed for existing deployments.
 # Per-agent tokens default to FORGE_TOKEN when unset (single-token setups).
 #
 # Note: `disinto init` auto-generates all bot tokens/passwords when you
 # configure [agents.llama] in a project TOML. The credentials are stored
 # in .env.enc (encrypted) or .env (plaintext fallback).
 # ── Woodpecker CI ─────────────────────────────────────────────────────────
 WOODPECKER_TOKEN=                          # [SECRET] Woodpecker API token
--- a/.gitignore
+++ b/.gitignore
@ -28,3 +28,6 @@ secrets/
 # Pre-built binaries for Docker builds (avoid network calls during build)
 docker/agents/bin/
 # Generated docker-compose.yml (run 'bin/disinto init' to regenerate)
 docker-compose.yml
--- a/.woodpecker/agent-smoke.sh
+++ b/.woodpecker/agent-smoke.sh
@ -6,8 +6,6 @@
 #   2. Every custom function called by agent scripts is defined in lib/ or the script itself
 #
 # Fast (<10s): no network, no tmux, no Claude needed.
 # Would have caught: kill_tmux_session (renamed), create_agent_session (missing),
 #                    read_phase (missing from dev-agent.sh scope)
 set -euo pipefail
@ -21,12 +19,16 @@ FAILED=0
 # Uses awk instead of grep -Eo for busybox/Alpine compatibility (#296).
 get_fns() {
  local f="$1"
-  # BRE mode (no -E).  Use [(][)] for literal parens — unambiguous across
+  # Pure-awk implementation: avoids grep/sed cross-platform differences
-  # GNU grep and BusyBox grep (some BusyBox builds treat bare () as grouping
+  # (BusyBox grep BRE quirks, sed ; separator issues on Alpine).
-  # even in BRE).  BRE one-or-more via [X][X]* instead of +.
+  awk '
-  grep '^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]' "$f" 2>/dev/null \
+    /^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]/ {
-    | sed 's/^[[:space:]]*//; s/[[:space:]]*[(][)].*$//' \
+      line = $0
-    | sort -u || true
+      gsub(/^[[:space:]]+/, "", line)
      sub(/[[:space:]]*[(].*/, "", line)
      print line
    }
  ' "$f" 2>/dev/null | sort -u || true
 }
 # Extract call-position identifiers that look like custom function calls:
@ -95,15 +97,14 @@ echo "=== 2/2  Function resolution ==="
 #
 # Included — these are inline-sourced by agent scripts:
 #   lib/env.sh              — sourced by every agent (log, forge_api, etc.)
 #   lib/agent-session.sh    — sourced by orchestrators (create_agent_session, monitor_phase_loop, etc.)
 #   lib/agent-sdk.sh        — sourced by SDK agents (agent_run, agent_recover_session)
 #   lib/ci-helpers.sh       — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.)
 #   lib/load-project.sh     — sourced by env.sh when PROJECT_TOML is set
 #   lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue)
-#   lib/secret-scan.sh      — sourced by file-action-issue.sh, phase-handler.sh (scan_for_secrets, redact_secrets)
+#   lib/secret-scan.sh      — sourced by file-action-issue.sh (scan_for_secrets, redact_secrets)
-#   lib/formula-session.sh  — sourced by formula-driven agents (acquire_cron_lock, run_formula_and_monitor, etc.)
+#   lib/formula-session.sh  — sourced by formula-driven agents (acquire_run_lock, check_memory, etc.)
 #   lib/mirrors.sh          — sourced by merge sites (mirror_push)
-#   lib/guard.sh            — sourced by all cron entry points (check_active)
+#   lib/guard.sh            — sourced by all polling-loop entry points (check_active)
 #   lib/issue-lifecycle.sh  — sourced by agents for issue claim/release/block/deps
 #   lib/worktree.sh         — sourced by agents for worktree create/recover/cleanup/preserve
 #
@ -116,7 +117,7 @@ echo "=== 2/2  Function resolution ==="
 # If a new lib file is added and sourced by agents, add it to LIB_FUNS below
 # and add a check_script call for it in the lib files section further down.
 LIB_FUNS=$(
-  for f in lib/agent-session.sh lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do
+  for f in lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do
    if [ -f "$f" ]; then get_fns "$f"; fi
  done | sort -u
 )
@ -180,13 +181,12 @@ check_script() {
 # These are already in LIB_FUNS (their definitions are available to agents),
 # but this verifies calls *within* each lib file are also resolvable.
 check_script lib/env.sh              lib/mirrors.sh
 check_script lib/agent-session.sh
 check_script lib/agent-sdk.sh
 check_script lib/ci-helpers.sh
 check_script lib/secret-scan.sh
 check_script lib/file-action-issue.sh   lib/secret-scan.sh
 check_script lib/tea-helpers.sh         lib/secret-scan.sh
-check_script lib/formula-session.sh     lib/agent-session.sh
+check_script lib/formula-session.sh
 check_script lib/load-project.sh
 check_script lib/mirrors.sh              lib/env.sh
 check_script lib/guard.sh
@ -199,18 +199,16 @@ check_script lib/ci-debug.sh
 check_script lib/parse-deps.sh
 # Agent scripts — list cross-sourced files where function scope flows across files.
 # phase-handler.sh defines default callback stubs; sourcing agents may override.
 check_script dev/dev-agent.sh
 check_script dev/phase-handler.sh      lib/secret-scan.sh
 check_script dev/dev-poll.sh
 check_script dev/phase-test.sh
-check_script gardener/gardener-run.sh
+check_script gardener/gardener-run.sh    lib/formula-session.sh
 check_script review/review-pr.sh         lib/agent-sdk.sh
 check_script review/review-poll.sh
-check_script planner/planner-run.sh      lib/agent-session.sh lib/formula-session.sh
+check_script planner/planner-run.sh      lib/formula-session.sh
 check_script supervisor/supervisor-poll.sh
 check_script supervisor/update-prompt.sh
-check_script supervisor/supervisor-run.sh
+check_script supervisor/supervisor-run.sh  lib/formula-session.sh
 check_script supervisor/preflight.sh
 check_script predictor/predictor-run.sh
 check_script architect/architect-run.sh
--- a/.woodpecker/ci.yml
+++ b/.woodpecker/ci.yml
@ -8,6 +8,19 @@
 when:
  event: [push, pull_request]
 # Override default clone to authenticate against Forgejo using FORGE_TOKEN.
 # Required because Forgejo is configured with REQUIRE_SIGN_IN, so anonymous
 # git clones fail with exit code 128. FORGE_TOKEN is injected globally via
 # WOODPECKER_ENVIRONMENT in docker-compose.yml (generated by lib/generators.sh).
 clone:
  git:
    image: alpine/git
    commands:
      - AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
      - git clone --depth 1 "$AUTH_URL" .
      - git fetch --depth 1 origin "$CI_COMMIT_REF"
      - git checkout FETCH_HEAD
 steps:
  - name: shellcheck
    image: koalaman/shellcheck-alpine:stable
--- a/.woodpecker/detect-duplicates.py
+++ b/.woodpecker/detect-duplicates.py
@ -267,41 +267,31 @@ def main() -> int:
        "2653705045fdf65072cccfd16eb04900": "Standard prompt template (GRAPH_SECTION, SCRATCH_CONTEXT, FORMULA_CONTENT)",
        "93726a3c799b72ed2898a55552031921": "Standard prompt template continuation (SCRATCH_CONTEXT, FORMULA_CONTENT, SCRATCH_INSTRUCTION)",
        "c11eaaacab69c9a2d3c38c75215eca84": "Standard prompt template end (FORMULA_CONTENT, SCRATCH_INSTRUCTION)",
-        # install_project_crons function in entrypoint.sh and entrypoint-llama.sh (intentional duplicate)
+        # Appears in stack_lock_acquire (lib/stack-lock.sh) and lib/pr-lifecycle.sh
-        "007e1390498374c68ab5d66aa6d277b2": "install_project_crons function in entrypoints (window 007e1390)",
+        "29d4f34b703f44699237713cc8d8065b": "Structural end-of-while-loop+case (return 1, esac, done, closing brace)",
-        "04143957d4c63e8a16ac28bddaff589b": "install_project_crons function in entrypoints (window 04143957)",
+        # Forgejo org-creation API call pattern shared between forge-setup.sh and ops-setup.sh
-        "076a19221cde674b2fce20a17292fa78": "install_project_crons function in entrypoints (window 076a1922)",
+        # Extracted from bin/disinto (not a .sh file, excluded from prior scans) into lib/forge-setup.sh
-        "0d498287626e105f16b24948aed53584": "install_project_crons function in entrypoints (window 0d498287)",
+        "059b11945140c172465f9126b829ed7f": "Forgejo org-creation curl pattern (forge-setup.sh + ops-setup.sh)",
-        "137b746928011acd758c7a9c690810b2": "install_project_crons function in entrypoints (window 137b7469)",
+        # Docker compose environment block for agents service (generators.sh + hire-agent.sh)
-        "287d33d98d21e3e07e0869e56ad94527": "install_project_crons function in entrypoints (window 287d33d9)",
+        # Intentional duplicate - both generate the same docker-compose.yml template
-        "325a3d54a15e59d333ec2a20c062cc8c": "install_project_crons function in entrypoints (window 325a3d54)",
+        "8066210169a462fe565f18b6a26a57e0": "Docker compose environment block (generators.sh + hire-agent.sh) - old",
-        "34e1943d5738f540d67c5c6bd3e60b20": "install_project_crons function in entrypoints (window 34e1943d)",
+        "fd978fcd726696e0f280eba2c5198d50": "Docker compose environment block continuation (generators.sh + hire-agent.sh) - old",
-        "3dabd19698f9705b05376c38042ccce8": "install_project_crons function in entrypoints (window 3dabd196)",
+        "e2760ccc2d4b993a3685bd8991594eb2": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old",
-        "446b420f7f9821a2553bc4995d1fac25": "install_project_crons function in entrypoints (window 446b420f)",
+        # The hash shown in output is 161a80f7 - need to match exactly what the script finds
-        "4826cf4896b792368c7b4d77573d0f8b": "install_project_crons function in entrypoints (window 4826cf48)",
+        "161a80f7296d6e9d45895607b7f5b9c9": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old",
-        "4e564d3bbda0ef33962af6042736dc1e": "install_project_crons function in entrypoints (window 4e564d3b)",
+        # New hash after explicit environment fix (#381)
-        "5a3d92b22e5d5bca8cce17d581ac6803": "install_project_crons function in entrypoints (window 5a3d92b2)",
+        "83fa229b86a7fdcb1d3591ab8e718f9d": "Docker compose explicit environment block (generators.sh + hire-agent.sh) - #381",
-        "63c20c5a31cf5e08f3a901ddf6db98af": "install_project_crons function in entrypoints (window 63c20c5a)",
+        # Verification mode helper functions - intentionally duplicated in dispatcher and entrypoint
-        "77547751325562fac397bbfd3a21c88e": "install_project_crons function in entrypoints (window 77547751)",
+        # These functions check if bug-report parent issues have all sub-issues closed
-        "80bdff63e54b4a260043d264b83d8eb0": "install_project_crons function in entrypoints (window 80bdff63)",
+        "b783d403276f78b49ad35840845126a1": "Verification helper: sub_issues variable declaration",
-        "84e55706393f731b293890dd6d830316": "install_project_crons function in entrypoints (window 84e55706)",
+        "4b19b9a1bdfbc62f003fc237ed270ed9": "Verification helper: python3 -c invocation",
-        "85f8a9d029ee9efecca73fd30449ccf4": "install_project_crons function in entrypoints (window 85f8a9d0)",
+        "cc1d0a9f85dfe0cc32e9ef6361cb8c3a": "Verification helper: Python imports and args",
-        "86e28dae676c905c5aa0035128e20e46": "install_project_crons function in entrypoints (window 86e28dae)",
+        "768926748b811ebd30f215f57db5de40": "Verification helper: json.load from /dev/stdin",
-        "a222b73bcd6a57adb2315726e81ab6cf": "install_project_crons function in entrypoints (window a222b73b)",
+        "4c58586a30bcf6b009c02010ed8f6256": "Verification helper: sub_issues list initialization",
-        "abd6c7efe66f533c48c883c2a6998886": "install_project_crons function in entrypoints (window abd6c7ef)",
+        "53ea3d6359f51d622467bd77b079cc88": "Verification helper: iterate issues in data",
-        "bcfeb67ce4939181330afea4949a95cf": "install_project_crons function in entrypoints (window bcfeb67c)",
+        "21aec56a99d5252b23fb9a38b895e8e8": "Verification helper: check body for Decomposed from pattern",
-        "c1248c98f978c48e4a1e5009a1440917": "install_project_crons function in entrypoints (window c1248c98)",
+        "60ea98b3604557d539193b2a6624e232": "Verification helper: append sub-issue number",
-        "c40571185b3306345ecf9ac33ab352a6": "install_project_crons function in entrypoints (window c4057118)",
+        "9f6ae8e7811575b964279d8820494eb0": "Verification helper: for loop done pattern",
        "c566639b237036a7a385982274d3d271": "install_project_crons function in entrypoints (window c566639b)",
        "d9cd2f3d874c32366d577ea0d334cd1a": "install_project_crons function in entrypoints (window d9cd2f3d)",
        "df4d3e905b12f2c68b206e45dddf9214": "install_project_crons function in entrypoints (window df4d3e90)",
        "e8e65ccf867fc6cbe49695ecdce2518e": "install_project_crons function in entrypoints (window e8e65ccf)",
        "eb8b298f06cda4359cc171206e0014bf": "install_project_crons function in entrypoints (window eb8b298f)",
        "ecdf0daa2f2845359a6a4aa12d327246": "install_project_crons function in entrypoints (window ecdf0daa)",
        "eeac93b2fba4de4589d36ca20845ec9f": "install_project_crons function in entrypoints (window eeac93b2)",
        "f08a7139db9c96cd3526549c499c0332": "install_project_crons function in entrypoints (window f08a7139)",
        "f0917809bdf28ff93fff0749e7e7fea0": "install_project_crons function in entrypoints (window f0917809)",
        "f0e4101f9b90c2fa921e088057a96db7": "install_project_crons function in entrypoints (window f0e4101f)",
    }
    if not sh_files:
--- a/.woodpecker/smoke-init.yml
+++ b/.woodpecker/smoke-init.yml
@ -4,6 +4,7 @@ when:
      - "bin/disinto"
      - "lib/load-project.sh"
      - "lib/env.sh"
      - "lib/generators.sh"
      - "tests/**"
      - ".woodpecker/smoke-init.yml"
--- a/AGENTS.md
+++ b/AGENTS.md
@ -1,13 +1,13 @@
-<!-- last-reviewed: 33eb565d7e0c5b7e0159e1720ba7f79126a7e25e -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Disinto — Agent Instructions
 ## What this repo is
-Disinto is an autonomous code factory. It manages seven agents (dev, review,
+Disinto is an autonomous code factory. It manages ten agents (dev, review,
-gardener, supervisor, planner, predictor, architect) that pick up issues from
+gardener, supervisor, planner, predictor, architect, reproduce, triage, edge
-forge, implement them, review PRs, plan from the vision, and keep the system
+dispatcher) that pick up issues from forge, implement them, review PRs, plan
-healthy — all via cron and `claude -p`. The dispatcher executes formula-based
+from the vision, and keep the system healthy — all via a polling loop (`docker/agents/entrypoint.sh`) and `claude -p`.
-operational tasks.
+The dispatcher executes formula-based operational tasks.
 Each agent has a `.profile` repository on Forgejo that stores lessons learned
 from prior sessions, providing continuous improvement across runs.
@ -21,27 +21,45 @@ See `README.md` for the full architecture and `disinto-factory/SKILL.md` for set
 ```
 disinto/                 (code repo)
-├── dev/           dev-poll.sh, dev-agent.sh, phase-handler.sh — issue implementation
+├── dev/           dev-poll.sh, dev-agent.sh, phase-test.sh — issue implementation
 ├── review/        review-poll.sh, review-pr.sh — PR review
-├── gardener/      gardener-run.sh — direct cron executor for run-gardener formula
+├── gardener/      gardener-run.sh — polling-loop executor for run-gardener formula
-├── predictor/     predictor-run.sh — daily cron executor for run-predictor formula
+│                  best-practices.md — gardener best-practice reference
-├── planner/       planner-run.sh — direct cron executor for run-planner formula
+│                  pending-actions.json — queued gardener actions
-├── supervisor/    supervisor-run.sh — formula-driven health monitoring (cron wrapper)
+├── predictor/     predictor-run.sh — polling-loop executor for run-predictor formula
 ├── planner/       planner-run.sh — polling-loop executor for run-planner formula
 ├── supervisor/    supervisor-run.sh — formula-driven health monitoring (polling-loop executor)
 │                  preflight.sh — pre-flight data collection for supervisor formula
 │                  supervisor-poll.sh — legacy bash orchestrator (superseded)
 ├── architect/     architect-run.sh — strategic decomposition of vision into sprints
 ├── vault/         vault-env.sh — shared env setup (vault redesign in progress, see #73-#77)
-├── lib/           env.sh, agent-session.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, profile.sh, build-graph.py
+│                  SCHEMA.md — vault item schema documentation
 │                  validate.sh — vault item validator
 │                  examples/ — example vault action TOMLs (promote, publish, release, webhook-call)
 ├── lib/           env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py,
 │                  branch-protection.sh, secret-scan.sh, tea-helpers.sh, vault.sh, ci-log-reader.py
 │                  hooks/ — Claude Code session hooks (on-compact-reinject, on-idle-stop, on-phase-change, on-pretooluse-guard, on-session-end, on-stop-failure)
 ├── projects/      *.toml.example — templates; *.toml — local per-box config (gitignored)
 ├── formulas/      Issue templates (TOML specs for multi-step agent tasks)
-└── docs/          Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md)
+├── docker/        Dockerfiles and entrypoints for reproduce, triage, and edge dispatcher agents
 ├── docs/          Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md)
 ├── site/          disinto.ai website content
 ├── tests/         Test files (mock-forgejo.py, smoke-init.sh)
 ├── templates/     Issue templates
 ├── bin/           The `disinto` CLI script
 ├── disinto-factory/  Setup documentation and skill
 ├── state/         Runtime state
 ├── .woodpecker/   Woodpecker CI pipeline configs
 ├── VISION.md      High-level project vision
 └── CLAUDE.md      Claude Code project instructions
 disinto-ops/             (ops repo — {project}-ops)
 ├── vault/
 │   ├── actions/   where vault action TOMLs land (core of vault workflow)
 │   ├── pending/   vault items awaiting approval
 │   ├── approved/  approved vault items
 │   ├── fired/     executed vault items
 │   └── rejected/  rejected vault items
 ├── sprints/       sprint planning artifacts
 ├── knowledge/     shared agent knowledge + best practices
 ├── evidence/      engagement data, experiment results
 ├── portfolio.md   addressables + observables
@ -49,11 +67,9 @@ disinto-ops/             (ops repo — {project}-ops)
 └── RESOURCES.md   accounts, tokens (refs), infra inventory
 ```
 > **Note:** Journal directories (`journal/planner/` and `journal/supervisor/`) have been removed from the ops repo. Agent journals are now stored in each agent's `.profile` repo on Forgejo.
 ## Agent .profile Model
-Each agent has a `.profile` repository on Forgejo storing `knowledge/lessons-learned.md` (injected into each session prompt) and `journal/` reflection entries (digested into lessons). Pre-session: `formula_prepare_profile_context()` loads lessons. Post-session: `profile_write_journal` records reflections. See `lib/profile.sh`.
+Each agent has a `.profile` repository on Forgejo storing `knowledge/lessons-learned.md` (injected into each session prompt) and `journal/` reflection entries (digested into lessons). Pre-session: `formula_prepare_profile_context()` loads lessons. Post-session: `profile_write_journal` records reflections. See `lib/formula-session.sh`.
 > **Terminology note:** "Formulas" are TOML issue templates in `formulas/` that orchestrate multi-step agent tasks. Distinct from "processes" in `docs/EVIDENCE-ARCHITECTURE.md`.
@ -98,6 +114,9 @@ bash dev/phase-test.sh
 | Planner | `planner/` | Strategic planning | [planner/AGENTS.md](planner/AGENTS.md) |
 | Predictor | `predictor/` | Infrastructure pattern detection | [predictor/AGENTS.md](predictor/AGENTS.md) |
 | Architect | `architect/` | Strategic decomposition | [architect/AGENTS.md](architect/AGENTS.md) |
 | Reproduce | `docker/reproduce/` | Bug reproduction using Playwright MCP | `formulas/reproduce.toml` |
 | Triage | `docker/reproduce/` | Deep root cause analysis | `formulas/triage.toml` |
 | Edge dispatcher | `docker/edge/` | Polls ops repo for vault actions, executes via Claude sessions | `docker/edge/dispatcher.sh` |
 > **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77).
 > See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow details.
@ -120,30 +139,24 @@ Issues flow: `backlog` → `in-progress` → PR → CI → review → merge →
 | `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, dev-poll.sh (on failure) |
 | `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) |
 | `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) |
 | `bug-report` | Issue describes user-facing broken behavior with reproduction steps. Separate triage track for reproduction automation. | Gardener (bug-report detection in grooming) |
 | `in-triage` | Bug reproduced but root cause not obvious — triage agent investigates. Set alongside `bug-report`. | reproduce-agent (when reproduction succeeds but cause unclear) |
 | `rejected` | Issue formally rejected — cannot reproduce, out of scope, or invalid. | reproduce-agent, humans |
 | `vision` | Goal anchors — high-level objectives from VISION.md. | Planner, humans |
 | `prediction/unreviewed` | Unprocessed prediction filed by predictor. | predictor-run.sh |
 | `prediction/dismissed` | Prediction triaged as DISMISS — planner disagrees, closed with reason. | Planner (triage-predictions step) |
 | `prediction/actioned` | Prediction promoted or dismissed by planner. | Planner (triage-predictions step) |
 | `formula` | Issue is a formula-based operational task. Dev-poll skips these; dispatcher handles them. | Dispatcher (when dispatching formula tasks) |
 ### Dependency conventions
-Issues declare dependencies in their body using a `## Dependencies` or
+Issues declare dependencies via `## Dependencies` / `## Depends on` sections listing `#N` refs. `lib/parse-deps.sh` extracts these; dev-poll only picks issues whose deps are all closed. See AD-002 for concurrency bounds per LLM backend.
 `## Depends on` section listing `#N` references. The dev-poll scheduler uses
 `lib/parse-deps.sh` to extract these and only picks issues whose dependencies
 are all closed.
 ### Single-threaded pipeline
 Each project processes one issue at a time. Dev-poll will not start new work
 while an open PR is waiting for CI or review. This keeps context clear and
 prevents merge conflicts between concurrent changes.
 ---
-## Addressables
+## Addressables and Observables
-Concrete artifacts the factory has produced or is building. The gardener
+Concrete artifacts the factory has produced or is building. Observables have measurement wired — the gardener promotes addressables once an evidence process is connected.
 maintains this table during grooming — see `formulas/run-gardener.toml`.
 | Artifact | Location | Observable? |
 |----------|----------|-------------|
@ -152,14 +165,6 @@ maintains this table during grooming — see `formulas/run-gardener.toml`.
 | Skill    | ClawHub (in progress) | No |
 | GitHub org | github.com/Disinto | No |
 ## Observables
 Addressables with measurement wired — the factory can read structured
 feedback from these. The gardener promotes addressables here once an
 evidence process is connected.
 None yet.
 ---
 ## Architecture Decisions
@ -168,17 +173,18 @@ Humans write these. Agents read and enforce them.
 | ID | Decision | Rationale |
 |---|---|---|
-| AD-001 | Nervous system runs from cron, not PR-based actions. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) |
+| AD-001 | Nervous system runs from a polling loop (`docker/agents/entrypoint.sh`), not PR-based actions. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) |
-| AD-002 | Single-threaded pipeline per project. | One dev issue at a time. No new work while a PR awaits CI or review. Prevents merge conflicts and keeps context clear. |
+| AD-002 | **Concurrency is bounded per LLM backend, not per project.** One concurrent Claude session per OAuth credential pool; one concurrent session per llama-server instance. Containers with disjoint backends may run in parallel. | The single-thread invariant is about *backends*, not pipelines. **(a) Anthropic OAuth credentials race on token refresh** — two sessions sharing one mounted `~/.claude` will trip over each other during rotation and 401. All agents inside an OAuth-mounted container serialize on `flock session.lock`. **(b) llama-server has finite VRAM and one KV cache** — parallel inference thrashes the cache and risks OOM. All llama-backed agents serialize on the same lock. **(c) Disjoint backends are free to parallelize.** Today `disinto-agents` (Anthropic OAuth, runs `review,gardener`) runs concurrently with `disinto-agents-llama` (llama, runs `dev`) on the same project — they share neither OAuth state nor llama VRAM. **(d) Per-project work-conflict safety** (no duplicate dev work, no merge conflicts on the same branch) is enforced by `issue_claim` (assignee + `in-progress` label) and per-issue worktrees — that's a separate guard that does NOT depend on this AD. |
 | AD-003 | The runtime creates and destroys, the formula preserves. | Runtime manages worktrees/sessions/temp. Formulas commit knowledge to git before signaling done. |
 | AD-004 | Event-driven > polling > fixed delays. | Never `waitForTimeout` or hardcoded sleep. Use phase files, webhooks, or poll loops with backoff. |
-| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (both SOPS-encrypted). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. |
+| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (SOPS-encrypted when available; plaintext `.env`/`.env.vault` fallback supported). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. |
 | AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. (Vault redesign in progress: PR-based approval on ops repo, see #73-#77) |
 **Who enforces what:**
 - **Gardener** checks open backlog issues against ADs during grooming; closes violations with a comment referencing the AD number.
 - **Planner** plans within the architecture; does not create issues that violate ADs.
 - **Dev-agent** reads AGENTS.md before implementing; refuses work that violates ADs.
 - **AD-002 is a runtime invariant; nothing for the gardener to check at issue-groom time.** Concurrency is enforced by `flock session.lock` within each container and by `issue_claim` for per-issue work. A violation manifests as a 401 or VRAM OOM in agent logs, not as a malformed issue.
 ---
--- a/README.md
+++ b/README.md
@ -21,22 +21,29 @@ Point it at a git repo with a Woodpecker CI pipeline and it will pick up issues,
 ## Architecture
 ```
-cron (*/10) ──→ supervisor-poll.sh    ← supervisor (bash checks, zero tokens)
+entrypoint.sh (while-true polling loop, 5 min base interval)
 │
 ├── every 5 min ──→ review-poll.sh   ← finds unreviewed PRs, spawns review
 │                    └── review-pr.sh  ← claude -p: review → approve/request changes
 │
 ├── every 5 min ──→ dev-poll.sh      ← pulls ready issues, spawns dev-agent
 │                    └── dev-agent.sh  ← claude -p: implement → PR → CI → review → merge
 │
 ├── every 6h ────→ gardener-run.sh   ← backlog grooming (duplicates, stale, tech-debt)
 │                   └── claude -p: triage → promote/close/escalate
 │
 ├── every 6h ────→ architect-run.sh  ← strategic decomposition of vision into sprints
 │
 ├── every 12h ───→ planner-run.sh    ← gap-analyse VISION.md, create backlog issues
 │                   └── claude -p: update AGENTS.md → create issues
 │
 └── every 24h ───→ predictor-run.sh  ← infrastructure pattern detection
 entrypoint-edge.sh (edge container)
 ├── dispatcher.sh                    ← polls ops repo for vault actions
 └── every 20 min → supervisor-run.sh ← health checks (bash checks, zero tokens)
                     ├── all clear? → exit 0
                     └── problem? → claude -p (diagnose, fix, or escalate)
 cron (*/10) ──→ dev-poll.sh        ← pulls ready issues, spawns dev-agent
                 └── dev-agent.sh   ← claude -p: implement → PR → CI → review → merge
 cron (*/10) ──→ review-poll.sh     ← finds unreviewed PRs, spawns review
                 └── review-pr.sh   ← claude -p: review → approve/request changes
 cron (daily) ──→ gardener-poll.sh  ← backlog grooming (duplicates, stale, tech-debt)
                  └── claude -p: triage → promote/close/escalate
 cron (weekly) ──→ planner-poll.sh  ← gap-analyse VISION.md, create backlog issues
                   └── claude -p: update AGENTS.md → create issues
 ```
 ## Prerequisites
@ -65,6 +72,8 @@ cd disinto
 disinto init https://github.com/yourorg/yourproject
 ```
 This will generate a `docker-compose.yml` file.
 Or configure manually — edit `.env` with your values:
 ```bash
@ -86,17 +95,11 @@ CLAUDE_TIMEOUT=7200         # max seconds per Claude invocation (default: 2h)
 ```
 ```bash
-# 3. Install cron (staggered to avoid overlap)
+# 3. Start the agent and edge containers
-crontab -e
+docker compose up -d
 # Add:
 #   0,10,20,30,40,50 * * * * /path/to/disinto/supervisor/supervisor-poll.sh
 #   3,13,23,33,43,53 * * * * /path/to/disinto/review/review-poll.sh
 #   6,16,26,36,46,56 * * * * /path/to/disinto/dev/dev-poll.sh
 #   15 8 * * *                /path/to/disinto/gardener/gardener-poll.sh
 #   0 9 * * 1                 /path/to/disinto/planner/planner-poll.sh
-# 4. Verify
+# 4. Verify the entrypoint loop is running
-bash supervisor/supervisor-poll.sh   # should log "all clear"
+docker exec disinto-agents tail -f /home/agent/data/agent-entrypoint.log
 ```
 ## Directory Structure
@ -109,16 +112,16 @@ disinto/
 │   ├── env.sh              # Shared: load .env, PATH, API helpers
 │   └── ci-debug.sh         # Woodpecker CI log/failure helper
 ├── dev/
-│   ├── dev-poll.sh       # Cron entry: find ready issues
+│   ├── dev-poll.sh       # Poll: find ready issues
 │   └── dev-agent.sh      # Implementation agent (claude -p)
 ├── review/
-│   ├── review-poll.sh    # Cron entry: find unreviewed PRs
+│   ├── review-poll.sh    # Poll: find unreviewed PRs
 │   └── review-pr.sh      # Review agent (claude -p)
 ├── gardener/
-│   ├── gardener-poll.sh  # Cron entry: backlog grooming
+│   ├── gardener-run.sh   # Executor: backlog grooming
 │   └── best-practices.md # Gardener knowledge base
 ├── planner/
-│   ├── planner-poll.sh   # Cron entry: weekly vision gap analysis
+│   ├── planner-run.sh    # Executor: vision gap analysis
 │   └── (formula-driven)  # run-planner.toml executed by dispatcher
 ├── vault/
 │   └── vault-env.sh      # Shared env setup (vault redesign in progress, see #73-#77)
@ -141,11 +144,11 @@ disinto/
 | Agent | Trigger | Job |
 |-------|---------|-----|
-| **Supervisor** | Every 10 min | Health checks (RAM, disk, CI, git). Calls Claude only when something is broken. Self-improving via `best-practices/`. |
+| **Supervisor** | Every 20 min | Health checks (RAM, disk, CI, git). Calls Claude only when something is broken. Self-improving via `best-practices/`. |
-| **Dev** | Every 10 min | Picks up `backlog`-labeled issues, creates a branch, implements, opens a PR, monitors CI, responds to review, merges. |
+| **Dev** | Every 5 min | Picks up `backlog`-labeled issues, creates a branch, implements, opens a PR, monitors CI, responds to review, merges. |
-| **Review** | Every 10 min | Finds PRs without review, runs Claude-powered code review, approves or requests changes. |
+| **Review** | Every 5 min | Finds PRs without review, runs Claude-powered code review, approves or requests changes. |
-| **Gardener** | Daily | Grooms the issue backlog: detects duplicates, promotes `tech-debt` to `backlog`, closes stale issues, escalates ambiguous items. |
+| **Gardener** | Every 6h | Grooms the issue backlog: detects duplicates, promotes `tech-debt` to `backlog`, closes stale issues, escalates ambiguous items. |
-| **Planner** | Weekly | Updates AGENTS.md documentation to reflect recent code changes, then gap-analyses VISION.md vs current state and creates up to 5 backlog issues for the highest-leverage gaps. |
+| **Planner** | Every 12h | Updates AGENTS.md documentation to reflect recent code changes, then gap-analyses VISION.md vs current state and creates up to 5 backlog issues for the highest-leverage gaps. |
 > **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77).
 > See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow and branch protection details.
--- a/architect/AGENTS.md
+++ b/architect/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: auto-generated -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Architect — Agent Instructions
 ## What this agent is
@ -11,7 +11,7 @@ converses with humans through PR comments.
 - **Input**: Vision issues from VISION.md, prerequisite tree from ops repo
 - **Output**: Sprint proposals as PRs on the ops repo, sub-issue files
- **Mechanism**: Formula-driven execution via `formulas/run-architect.toml`
+- **Mechanism**: Bash-driven orchestration in `architect-run.sh`, pitching formula via `formulas/run-architect.toml`
 - **Identity**: `architect-bot` on Forgejo
 ## Responsibilities
@ -29,28 +29,71 @@ converses with humans through PR comments.
 ## Formula
-The architect is driven by `formulas/run-architect.toml`. This formula defines
+The architect pitching is driven by `formulas/run-architect.toml`. This formula defines
 the steps for:
 - Research: analyzing vision items and prerequisite tree
- Design: identifying implementation approaches and forks
+- Pitch: creating structured sprint PRs
 - Sprint proposal: creating structured sprint PRs
 - Sub-issue filing: creating concrete implementation issues
 ## Bash-driven orchestration
 Bash in `architect-run.sh` handles state detection and orchestration:
 - **Deterministic state detection**: Bash reads the Forgejo reviews API to detect
  ACCEPT/REJECT decisions — no model-dependent API parsing
 - **Human guidance injection**: Review body text from ACCEPT reviews is injected
  directly into the research prompt as context
 - **Response processing**: When ACCEPT/REJECT responses are detected, bash invokes
  the agent with appropriate context (session resumed for questions phase)
 ### State transitions
 ```
 New vision issue → pitch PR (model generates pitch, bash creates PR)
  ↓
 APPROVED review → start design questions (model posts Q1:, adds Design forks section)
  ↓
 Answers received → continue Q&A (model processes answers, posts follow-ups)
  ↓
 All forks resolved → sub-issue filing (model files implementation issues)
  ↓
 REJECT review → close PR + journal (model processes rejection, bash merges PR)
 ```
 ### Session management
 The agent maintains a global session file at `/tmp/architect-session-{project}.sid`.
 When processing responses, bash checks if the PR is in the questions phase and
 resumes the session using `--resume session_id` to preserve codebase context.
 ## Execution
 Run via `architect/architect-run.sh`, which:
- Acquires a cron lock and checks available memory
+- Acquires a poll-loop lock (via `acquire_lock`) and checks available memory
 - Cleans up per-issue scratch files from previous runs (`/tmp/architect-{project}-scratch-*.md`)
 - Sources shared libraries (env.sh, formula-session.sh)
 - Uses FORGE_ARCHITECT_TOKEN for authentication
 - Processes existing architect PRs via bash-driven design phase
 - Loads the formula and builds context from VISION.md, AGENTS.md, and ops repo
- Executes the formula via `agent_run`
+- Bash orchestrates state management:
  - Fetches open vision issues, open architect PRs, and merged sprint PRs from Forgejo API
  - Filters out visions already with open PRs, in-progress label, sub-issues, or merged sprint PRs
  - Selects up to `pitch_budget` (3 - open architect PRs) remaining vision issues
  - For each selected issue, invokes stateless `claude -p` with issue body + context
  - Creates PRs directly from pitch content (no scratch files)
 - Agent is invoked only for response processing (ACCEPT/REJECT handling)
-## Cron
+**Multi-sprint pitching**: The architect pitches up to 3 sprints per run. Bash handles all state management:
 - Fetches Forgejo API data (vision issues, open PRs, merged PRs)
 - Filters and deduplicates (no model-level dedup or journal-based memory)
 - For each selected vision issue, bash invokes stateless `claude -p` to generate pitch markdown
 - Bash creates the PR with pitch content and posts ACCEPT/REJECT footer comment
 - Branch names use issue number (architect/sprint-vision-{issue_number}) to avoid collisions
-Suggested cron entry (every 6 hours):
+## Schedule
-```cron
+
-0 */6 * * * cd /path/to/disinto && bash architect/architect-run.sh
+The architect runs every 6 hours as part of the polling loop in
-```
+`docker/agents/entrypoint.sh` (iteration math at line 196-208).
 ## State
@ -63,3 +106,4 @@ empty file not created, just document it).
 - #100: Architect formula — research + design fork identification
 - #101: Architect formula — sprint PR creation with questions
 - #102: Architect formula — answer parsing + sub-issue filing
 - #491: Refactor — bash-driven design phase with stateful session resumption
--- a/architect/architect-run.sh
+++ b/architect/architect-run.sh
@ -1,20 +1,31 @@
 #!/usr/bin/env bash
 # =============================================================================
-# architect-run.sh — Cron wrapper: architect execution via SDK + formula
+# architect-run.sh — Polling-loop wrapper: architect execution via SDK + formula
 #
 # Synchronous bash loop using claude -p (one-shot invocation).
 # No tmux sessions, no phase files — the bash script IS the state machine.
 #
 # Flow:
-#   1. Guards: cron lock, memory check
+#   1. Guards: run lock, memory check
-#   2. Load formula (formulas/run-architect.toml)
+#   2. Precondition checks: skip if no work (no vision issues, no responses)
-#   3. Context: VISION.md, AGENTS.md, ops:prerequisites.md, structural graph
+#   3. Load formula (formulas/run-architect.toml)
-#   4. agent_run(worktree, prompt) → Claude decomposes vision into sprints
+#   4. Context: VISION.md, AGENTS.md, ops:prerequisites.md, structural graph
 #   5. Stateless pitch generation: for each selected issue:
 #      - Fetch issue body from Forgejo API (bash)
 #      - Invoke claude -p with issue body + context (stateless, no API calls)
 #      - Create PR with pitch content (bash)
 #      - Post footer comment (bash)
 #   6. Response processing: handle ACCEPT/REJECT on existing PRs
 #
 # Precondition checks (bash before model):
 #   - Skip if no vision issues AND no open architect PRs
 #   - Skip if 3+ architect PRs open AND no ACCEPT/REJECT responses to process
 #   - Only invoke model when there's actual work: new pitches or response processing
 #
 # Usage:
 #   architect-run.sh [projects/disinto.toml]   # project config (default: disinto)
 #
-# Cron: 0 */6 * * *   # every 6 hours
+# Called by: entrypoint.sh polling loop (every 6 hours)
 # =============================================================================
 set -euo pipefail
@ -36,23 +47,42 @@ source "$FACTORY_ROOT/lib/guard.sh"
 # shellcheck source=../lib/agent-sdk.sh
 source "$FACTORY_ROOT/lib/agent-sdk.sh"
-LOG_FILE="$SCRIPT_DIR/architect.log"
+LOG_FILE="${DISINTO_LOG_DIR}/architect/architect.log"
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh
 LOGFILE="$LOG_FILE"
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh
 SID_FILE="/tmp/architect-session-${PROJECT_NAME}.sid"
 # Per-PR session files for stateful resumption across runs
 SID_DIR="/tmp/architect-sessions-${PROJECT_NAME}"
 mkdir -p "$SID_DIR"
 SCRATCH_FILE="/tmp/architect-${PROJECT_NAME}-scratch.md"
 SCRATCH_FILE_PREFIX="/tmp/architect-${PROJECT_NAME}-scratch"
 WORKTREE="/tmp/${PROJECT_NAME}-architect-run"
-log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
+# Override LOG_AGENT for consistent agent identification
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh and env.sh
 LOG_AGENT="architect"
 # Override log() to append to architect-specific log file
 # shellcheck disable=SC2034
 log() {
  local agent="${LOG_AGENT:-architect}"
  printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
 }
 # ── Guards ────────────────────────────────────────────────────────────────
 check_active architect
-acquire_cron_lock "/tmp/architect-run.lock"
+acquire_run_lock "/tmp/architect-run.lock"
-check_memory 2000
+memory_guard 2000
 log "--- Architect run start ---"
 # ── Resolve forge remote for git operations ─────────────────────────────
 # Run git operations from the project checkout, not the baked code dir
 cd "$PROJECT_REPO_ROOT"
 resolve_forge_remote
 # ── Resolve agent identity for .profile repo ────────────────────────────
 if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_ARCHITECT_TOKEN:-}" ]; then
  AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_ARCHITECT_TOKEN}" \
@ -102,18 +132,773 @@ ${PROMPT_FOOTER}
 _PROMPT_EOF_
 }
-PROMPT=$(build_architect_prompt)
+# ── Build prompt for specific session mode ───────────────────────────────
 # Args: session_mode (pitch / questions_phase / start_questions)
 # Returns: prompt text via stdout
 build_architect_prompt_for_mode() {
  local session_mode="$1"
  case "$session_mode" in
    "start_questions")
      cat <<_PROMPT_EOF_
 You are the architect agent for ${FORGE_REPO}. Work through the formula below.
 Your role: strategic decomposition of vision issues into development sprints.
 Propose sprints via PRs on the ops repo, converse with humans through PR comments,
 and file sub-issues after design forks are resolved.
 ## CURRENT STATE: Approved PR awaiting initial design questions
 A sprint pitch PR has been approved by the human (via APPROVED review), but the
 design conversation has not yet started. Your task is to:
 1. Read the approved sprint pitch from the PR body
 2. Identify the key design decisions that need human input
 3. Post initial design questions (Q1:, Q2:, etc.) as comments on the PR
 4. Add a `## Design forks` section to the PR body documenting the design decisions
 5. File sub-issues for each design fork path if applicable
 This is NOT a pitch phase — the pitch is already approved. This is the START
 of the design Q&A phase.
 ## Project context
 ${CONTEXT_BLOCK}
 ${GRAPH_SECTION}
 ${SCRATCH_CONTEXT}
 $(formula_lessons_block)
 ## Formula
 ${FORMULA_CONTENT}
 ${SCRATCH_INSTRUCTION}
 ${PROMPT_FOOTER}
 _PROMPT_EOF_
      ;;
    "questions_phase")
      cat <<_PROMPT_EOF_
 You are the architect agent for ${FORGE_REPO}. Work through the formula below.
 Your role: strategic decomposition of vision issues into development sprints.
 Propose sprints via PRs on the ops repo, converse with humans through PR comments,
 and file sub-issues after design forks are resolved.
 ## CURRENT STATE: Design Q&A in progress
 A sprint pitch PR is in the questions phase:
 - The PR has a `## Design forks` section
 - Initial questions (Q1:, Q2:, etc.) have been posted
 - Humans may have posted answers or follow-up questions
 Your task is to:
 1. Read the existing questions and the PR body
 2. Read human answers from PR comments
 3. Parse the answers and determine next steps
 4. Post follow-up questions if needed (Q3:, Q4:, etc.)
 5. If all design forks are resolved, file sub-issues for each path
 6. Update the `## Design forks` section as you progress
 ## Project context
 ${CONTEXT_BLOCK}
 ${GRAPH_SECTION}
 ${SCRATCH_CONTEXT}
 $(formula_lessons_block)
 ## Formula
 ${FORMULA_CONTENT}
 ${SCRATCH_INSTRUCTION}
 ${PROMPT_FOOTER}
 _PROMPT_EOF_
      ;;
    "pitch"|*)
      # Default: pitch new sprints (original behavior)
      build_architect_prompt
      ;;
  esac
 }
 # ── Create worktree ──────────────────────────────────────────────────────
 formula_worktree_setup "$WORKTREE"
-# ── Run agent ─────────────────────────────────────────────────────────────
+# ── Detect if PR is in questions-awaiting-answers phase ──────────────────
-export CLAUDE_MODEL="sonnet"
+# A PR is in the questions phase if it has a `## Design forks` section and
 # question comments. We check this to decide whether to resume the session
 # from the research/questions run (preserves codebase context for answer parsing).
 detect_questions_phase() {
  local pr_number=""
  local pr_body=""
-agent_run --worktree "$WORKTREE" "$PROMPT"
+  # Get open architect PRs on ops repo
-log "agent_run complete"
+  local ops_repo="${OPS_REPO_ROOT:-/home/agent/data/ops}"
  if [ ! -d "${ops_repo}/.git" ]; then
    return 1
  fi
  # Use Forgejo API to find open architect PRs
  local response
  response=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open" 2>/dev/null) || return 1
  # Check each open PR for architect markers
  pr_number=$(printf '%s' "$response" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null | head -1) || return 1
  if [ -z "$pr_number" ]; then
    return 1
  fi
  # Fetch PR body
  pr_body=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}" 2>/dev/null | jq -r '.body // empty') || return 1
  # Check for `## Design forks` section (added by #101 after ACCEPT)
  if ! printf '%s' "$pr_body" | grep -q "## Design forks"; then
    return 1
  fi
  # Check for question comments (Q1:, Q2:, etc.)
  # Use jq to extract body text before grepping (handles JSON escaping properly)
  local comments
  comments=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" 2>/dev/null) || return 1
  if ! printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE 'Q[0-9]+:'; then
    return 1
  fi
  # PR is in questions phase
  log "Detected PR #${pr_number} in questions-awaiting-answers phase"
  return 0
 }
 # ── Detect if PR is approved and awaiting initial design questions ────────
 # A PR is in this state when:
 # - It's an open architect PR on ops repo
 # - It has an APPROVED review (from human acceptance)
 # - It has NO `## Design forks` section yet
 # - It has NO Q1:, Q2:, etc. comments yet
 # This means the human accepted the pitch and we need to start the design
 # conversation by posting initial questions and adding the Design forks section.
 detect_approved_pending_questions() {
  local pr_number=""
  local pr_body=""
  # Get open architect PRs on ops repo
  local ops_repo="${OPS_REPO_ROOT:-/home/agent/data/ops}"
  if [ ! -d "${ops_repo}/.git" ]; then
    return 1
  fi
  # Use Forgejo API to find open architect PRs
  local response
  response=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open" 2>/dev/null) || return 1
  # Check each open PR for architect markers
  pr_number=$(printf '%s' "$response" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null | head -1) || return 1
  if [ -z "$pr_number" ]; then
    return 1
  fi
  # Fetch PR body
  pr_body=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}" 2>/dev/null | jq -r '.body // empty') || return 1
  # Check for APPROVED review
  local reviews
  reviews=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/reviews" 2>/dev/null) || return 1
  if ! printf '%s' "$reviews" | jq -e '.[] | select(.state == "APPROVED")' >/dev/null 2>&1; then
    return 1
  fi
  # Check that PR does NOT have `## Design forks` section yet
  # (we're in the "start questions" phase, not "process answers" phase)
  if printf '%s' "$pr_body" | grep -q "## Design forks"; then
    # Has design forks section — this is either in questions phase or past it
    return 1
  fi
  # Check that PR has NO question comments yet (Q1:, Q2:, etc.)
  local comments
  comments=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" 2>/dev/null) || return 1
  if printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE 'Q[0-9]+:'; then
    # Has question comments — this is either in questions phase or past it
    return 1
  fi
  # PR is approved and awaiting initial design questions
  log "Detected PR #${pr_number} approved and awaiting initial design questions"
  return 0
 }
 # ── Sub-issue existence check ────────────────────────────────────────────
 # Check if a vision issue already has sub-issues filed from it.
 # Returns 0 if sub-issues exist and are open, 1 otherwise.
 # Args: vision_issue_number
 has_open_subissues() {
  local vision_issue="$1"
  local subissue_count=0
  # Search for issues whose body contains 'Decomposed from #N' pattern
  # Fetch all open issues with bodies in one API call (avoids N+1 calls)
  local issues_json
  issues_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/issues?state=open&limit=100" 2>/dev/null) || return 1
  # Check each issue for the decomposition pattern using jq to extract bodies
  subissue_count=$(printf '%s' "$issues_json" | jq -r --arg vid "$vision_issue" '
    [.[] | select(.number != ($vid | tonumber)) | select(.body // "" | contains("Decomposed from #" + $vid))] | length
  ' 2>/dev/null) || subissue_count=0
  if [ "$subissue_count" -gt 0 ]; then
    log "Vision issue #${vision_issue} has ${subissue_count} open sub-issue(s) — skipping"
    return 0  # Has open sub-issues
  fi
  log "Vision issue #${vision_issue} has no open sub-issues"
  return 1  # No open sub-issues
 }
 # ── Merged sprint PR check ───────────────────────────────────────────────
 # Check if a vision issue already has a merged sprint PR on the ops repo.
 # Returns 0 if a merged sprint PR exists, 1 otherwise.
 # Args: vision_issue_number
 has_merged_sprint_pr() {
  local vision_issue="$1"
  # Get closed PRs from ops repo
  local prs_json
  prs_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=closed&limit=100" 2>/dev/null) || return 1
  # Check each closed PR for architect markers and vision issue reference
  local pr_numbers
  pr_numbers=$(printf '%s' "$prs_json" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null) || return 1
  local pr_num
  while IFS= read -r pr_num; do
    [ -z "$pr_num" ] && continue
    # Get PR details including merged status
    local pr_details
    pr_details=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
      "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null) || continue
    # Check if PR is actually merged (not just closed)
    local is_merged
    is_merged=$(printf '%s' "$pr_details" | jq -r '.merged // false') || continue
    if [ "$is_merged" != "true" ]; then
      continue
    fi
    # Get PR body and check for vision issue reference
    local pr_body
    pr_body=$(printf '%s' "$pr_details" | jq -r '.body // ""') || continue
    # Check if PR body references the vision issue number
    # Look for patterns like "#N" where N is the vision issue number
    if printf '%s' "$pr_body" | grep -qE "(#|refs|references)[[:space:]]*#${vision_issue}|#${vision_issue}[^0-9]|#${vision_issue}$"; then
      log "Found merged sprint PR #${pr_num} referencing vision issue #${vision_issue} — skipping"
      return 0  # Has merged sprint PR
    fi
  done <<< "$pr_numbers"
  log "Vision issue #${vision_issue} has no merged sprint PR"
  return 1  # No merged sprint PR
 }
 # ── Helper: Fetch all open vision issues from Forgejo API ─────────────────
 # Returns: JSON array of vision issue objects
 fetch_vision_issues() {
  curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/issues?labels=vision&state=open&limit=100" 2>/dev/null || echo '[]'
 }
 # ── Helper: Fetch open architect PRs from ops repo Forgejo API ───────────
 # Returns: JSON array of architect PR objects
 fetch_open_architect_prs() {
  curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null || echo '[]'
 }
 # ── Helper: Get vision issue body by number ──────────────────────────────
 # Args: issue_number
 # Returns: issue body text
 get_vision_issue_body() {
  local issue_num="$1"
  curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/issues/${issue_num}" 2>/dev/null | jq -r '.body // ""'
 }
 # ── Helper: Get vision issue title by number ─────────────────────────────
 # Args: issue_number
 # Returns: issue title
 get_vision_issue_title() {
  local issue_num="$1"
  curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/issues/${issue_num}" 2>/dev/null | jq -r '.title // ""'
 }
 # ── Helper: Create a sprint pitch via stateless claude -p call ───────────
 # The model NEVER calls Forgejo API. It only reads context and generates pitch.
 # Args: vision_issue_number vision_issue_title vision_issue_body
 # Returns: pitch markdown to stdout
 #
 # This is a stateless invocation: the model has no memory between calls.
 # All state management (which issues to pitch, dedup logic, etc.) happens in bash.
 generate_pitch() {
  local issue_num="$1"
  local issue_title="$2"
  local issue_body="$3"
  # Build context block with vision issue details
  local pitch_context
  pitch_context="
 ## Vision Issue #${issue_num}
 ### Title
 ${issue_title}
 ### Description
 ${issue_body}
 ## Project Context
 ${CONTEXT_BLOCK}
 ${GRAPH_SECTION}
 $(formula_lessons_block)
 ## Formula
 ${FORMULA_CONTENT}
 ${SCRATCH_INSTRUCTION}
 ${PROMPT_FOOTER}
 "
  # Prompt: model generates pitch markdown only, no API calls
  local pitch_prompt="You are the architect agent for ${FORGE_REPO}. Write a sprint pitch for the vision issue above.
 Instructions:
 1. Output ONLY the pitch markdown (no explanations, no preamble, no postscript)
 2. Use this exact format:
 # Sprint: <sprint-name>
 ## Vision issues
 - #${issue_num} — ${issue_title}
 ## What this enables
 <what the project can do after this sprint that it can't do now>
 ## What exists today
 <current state — infrastructure, interfaces, code that can be reused>
 ## Complexity
 <number of files/subsystems, estimated sub-issues>
 <gluecode vs greenfield ratio>
 ## Risks
 <what could go wrong, what breaks if this is done badly>
 ## Cost — new infra to maintain
 <what ongoing maintenance burden does this sprint add>
 <new services, scheduled tasks, formulas, agent roles>
 ## Recommendation
 <architect's assessment: worth it / defer / alternative approach>
 IMPORTANT: Do NOT include design forks or questions. This is a go/no-go pitch.
 ---
 ${pitch_context}
 "
  # Execute stateless claude -p call
  local pitch_output
  pitch_output=$(agent_run -p "$pitch_prompt" --output-format json --dangerously-skip-permissions --max-turns 200 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") || true
  # Extract pitch content from JSON response
  local pitch
  pitch=$(printf '%s' "$pitch_output" | jq -r '.content // empty' 2>/dev/null) || pitch=""
  if [ -z "$pitch" ]; then
    log "WARNING: empty pitch generated for vision issue #${issue_num}"
    return 1
  fi
  # Output pitch to stdout for caller to use
  printf '%s' "$pitch"
 }
 # ── Helper: Create PR on ops repo via Forgejo API ────────────────────────
 # Args: sprint_title sprint_body branch_name
 # Returns: PR number on success, empty on failure
 create_sprint_pr() {
  local sprint_title="$1"
  local sprint_body="$2"
  local branch_name="$3"
  # Create branch on ops repo
  if ! curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/branches" \
    -d "{\"new_branch_name\": \"${branch_name}\", \"old_branch_name\": \"${PRIMARY_BRANCH:-main}\"}" >/dev/null 2>&1; then
    log "WARNING: failed to create branch ${branch_name}"
    return 1
  fi
  # Extract sprint name from title for filename
  local sprint_name
  sprint_name=$(printf '%s' "$sprint_title" | sed 's/^architect: *//; s/ *$//')
  local sprint_slug
  sprint_slug=$(printf '%s' "$sprint_name" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' | sed 's/--*/-/g')
  # Prepare sprint spec content
  local sprint_spec="# Sprint: ${sprint_name}
 ${sprint_body}
 "
  # Base64 encode the content
  local sprint_spec_b64
  sprint_spec_b64=$(printf '%s' "$sprint_spec" | base64 -w 0)
  # Write sprint spec file to branch
  if ! curl -sf -X PUT \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/contents/sprints/${sprint_slug}.md" \
    -d "{\"message\": \"sprint: add ${sprint_slug}.md\", \"content\": \"${sprint_spec_b64}\", \"branch\": \"${branch_name}\"}" >/dev/null 2>&1; then
    log "WARNING: failed to write sprint spec file"
    return 1
  fi
  # Create PR - use jq to build JSON payload safely (prevents injection from markdown)
  local pr_payload
  pr_payload=$(jq -n \
    --arg title "$sprint_title" \
    --arg body "$sprint_body" \
    --arg head "$branch_name" \
    --arg base "${PRIMARY_BRANCH:-main}" \
    '{title: $title, body: $body, head: $head, base: $base}')
  local pr_response
  pr_response=$(curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls" \
    -d "$pr_payload" 2>/dev/null) || return 1
  # Extract PR number
  local pr_number
  pr_number=$(printf '%s' "$pr_response" | jq -r '.number // empty')
  log "Created sprint PR #${pr_number}: ${sprint_title}"
  printf '%s' "$pr_number"
 }
 # ── Helper: Post footer comment on PR ────────────────────────────────────
 # Args: pr_number
 post_pr_footer() {
  local pr_number="$1"
  local footer="Reply \`ACCEPT\` to proceed with design questions, or \`REJECT: <reason>\` to decline."
  if curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" \
    -d "{\"body\": \"${footer}\"}" >/dev/null 2>&1; then
    log "Posted footer comment on PR #${pr_number}"
    return 0
  else
    log "WARNING: failed to post footer comment on PR #${pr_number}"
    return 1
  fi
 }
 # ── Helper: Add in-progress label to vision issue ────────────────────────
 # Args: vision_issue_number
 add_inprogress_label() {
  local issue_num="$1"
  # Get label ID for 'in-progress'
  local labels_json
  labels_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/labels" 2>/dev/null) || return 1
  local inprogress_label_id
  inprogress_label_id=$(printf '%s' "$labels_json" | jq -r --arg label "in-progress" '.[] | select(.name == $label) | .id' 2>/dev/null) || true
  if [ -z "$inprogress_label_id" ]; then
    log "WARNING: in-progress label not found"
    return 1
  fi
  # Add label to issue
  if curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/repos/${FORGE_REPO}/issues/${issue_num}/labels" \
    -d "{\"labels\": [${inprogress_label_id}]}" >/dev/null 2>&1; then
    log "Added in-progress label to vision issue #${issue_num}"
    return 0
  else
    log "WARNING: failed to add in-progress label to vision issue #${issue_num}"
    return 1
  fi
 }
 # ── Precondition checks in bash before invoking the model ─────────────────
 # Check 1: Skip if no vision issues exist and no open architect PRs to handle
 vision_count=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
  "$FORGE_API/issues?labels=vision&state=open&limit=1" 2>/dev/null | jq length) || vision_count=0
 if [ "${vision_count:-0}" -eq 0 ]; then
  # Check for open architect PRs that need handling (ACCEPT/REJECT responses)
  open_arch_prs=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=10" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0
  if [ "${open_arch_prs:-0}" -eq 0 ]; then
    log "no vision issues and no open architect PRs — skipping"
    exit 0
  fi
 fi
 # Check 2: Scan for ACCEPT/REJECT responses on open architect PRs (unconditional)
 # This ensures responses are processed regardless of open_arch_prs count
 has_responses_to_process=false
 pr_numbers=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
  "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers=""
 for pr_num in $pr_numbers; do
  comments=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue
  if printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE '(ACCEPT|REJECT):'; then
    has_responses_to_process=true
    break
  fi
 done
 # Check 2 (continued): Skip if already at max open pitches (3), unless there are responses to process
 open_arch_prs=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
  "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0
 if [ "${open_arch_prs:-0}" -ge 3 ]; then
  if [ "$has_responses_to_process" = false ]; then
    log "already 3 open architect PRs with no responses to process — skipping"
    exit 0
  fi
  log "3 open architect PRs found but responses detected — processing"
 fi
 # ── Bash-driven state management: Select vision issues for pitching ───────
 # This logic is also documented in formulas/run-architect.toml preflight step
 # Fetch all data from Forgejo API upfront (bash handles state, not model)
 vision_issues_json=$(fetch_vision_issues)
 open_arch_prs_json=$(fetch_open_architect_prs)
 # Build list of vision issues that already have open architect PRs
 declare -A _arch_vision_issues_with_open_prs
 while IFS= read -r pr_num; do
  [ -z "$pr_num" ] && continue
  pr_body=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null | jq -r '.body // ""') || continue
  # Extract vision issue numbers referenced in PR body (e.g., "refs #419" or "#419")
  while IFS= read -r ref_issue; do
    [ -z "$ref_issue" ] && continue
    _arch_vision_issues_with_open_prs["$ref_issue"]=1
  done <<< "$(printf '%s' "$pr_body" | grep -oE '#[0-9]+' | tr -d '#' | sort -u)"
 done <<< "$(printf '%s' "$open_arch_prs_json" | jq -r '.[] | select(.title | startswith("architect:")) | .number')"
 # Get all open vision issues
 vision_issues_json=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
  "${FORGE_API}/issues?labels=vision&state=open&limit=100" 2>/dev/null) || vision_issues_json='[]'
 # Get issues with in-progress label
 in_progress_issues=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
  "${FORGE_API}/issues?labels=in-progress&state=open&limit=100" 2>/dev/null | jq -r '.[].number' 2>/dev/null) || in_progress_issues=""
 # Select vision issues for pitching
 ARCHITECT_TARGET_ISSUES=()
 vision_issue_count=0
 pitch_budget=$((3 - open_arch_prs))
 # Get all vision issue numbers
 vision_issue_nums=$(printf '%s' "$vision_issues_json" | jq -r '.[].number' 2>/dev/null) || vision_issue_nums=""
 while IFS= read -r vision_issue; do
  [ -z "$vision_issue" ] && continue
  vision_issue_count=$((vision_issue_count + 1))
  # Skip if pitch budget exhausted
  if [ "${pitch_budget}" -le 0 ] || [ ${#ARCHITECT_TARGET_ISSUES[@]} -ge "$pitch_budget" ]; then
    log "Pitch budget exhausted (${#ARCHITECT_TARGET_ISSUES[@]}/${pitch_budget})"
    break
  fi
  # Skip if vision issue already has open architect PR
  if [ "${_arch_vision_issues_with_open_prs[$vision_issue]:-}" = "1" ]; then
    log "Vision issue #${vision_issue} already has open architect PR — skipping"
    continue
  fi
  # Skip if vision issue has in-progress label
  if printf '%s\n' "$in_progress_issues" | grep -q "^${vision_issue}$"; then
    log "Vision issue #${vision_issue} has in-progress label — skipping"
    continue
  fi
  # Skip if vision issue has open sub-issues (already being worked on)
  if has_open_subissues "$vision_issue"; then
    log "Vision issue #${vision_issue} has open sub-issues — skipping"
    continue
  fi
  # Skip if vision issue has merged sprint PR (decomposition already done)
  if has_merged_sprint_pr "$vision_issue"; then
    log "Vision issue #${vision_issue} has merged sprint PR — skipping"
    continue
  fi
  # Add to target issues
  ARCHITECT_TARGET_ISSUES+=("$vision_issue")
  log "Selected vision issue #${vision_issue} for pitching"
 done <<< "$vision_issue_nums"
 # If no issues selected, decide whether to exit or process responses
 if [ ${#ARCHITECT_TARGET_ISSUES[@]} -eq 0 ]; then
  if [ "${has_responses_to_process:-false}" = "true" ]; then
    log "No new pitches needed — responses to process"
    # Fall through to response processing block below
  else
    log "No vision issues available for pitching (all have open PRs, sub-issues, or merged sprint PRs) — signaling PHASE:done"
    # Signal PHASE:done by writing to phase file if it exists
    if [ -f "/tmp/architect-${PROJECT_NAME}.phase" ]; then
      echo "PHASE:done" > "/tmp/architect-${PROJECT_NAME}.phase"
    fi
    exit 0
  fi
 fi
 log "Selected ${#ARCHITECT_TARGET_ISSUES[@]} vision issue(s) for pitching: ${ARCHITECT_TARGET_ISSUES[*]}"
 # ── Stateless pitch generation and PR creation (bash-driven, no model API calls) ──
 # For each target issue:
 #   1. Fetch issue body from Forgejo API (bash)
 #   2. Invoke claude -p with issue body + context (stateless, no API calls)
 #   3. Create PR with pitch content (bash)
 #   4. Post footer comment (bash)
 pitch_count=0
 for vision_issue in "${ARCHITECT_TARGET_ISSUES[@]}"; do
  log "Processing vision issue #${vision_issue}"
  # Fetch vision issue details from Forgejo API (bash, not model)
  issue_title=$(get_vision_issue_title "$vision_issue")
  issue_body=$(get_vision_issue_body "$vision_issue")
  if [ -z "$issue_title" ] || [ -z "$issue_body" ]; then
    log "WARNING: failed to fetch vision issue #${vision_issue} details"
    continue
  fi
  # Generate pitch via stateless claude -p call (model has no API access)
  log "Generating pitch for vision issue #${vision_issue}"
  pitch=$(generate_pitch "$vision_issue" "$issue_title" "$issue_body") || true
  if [ -z "$pitch" ]; then
    log "WARNING: failed to generate pitch for vision issue #${vision_issue}"
    continue
  fi
  # Create sprint PR (bash, not model)
  # Use issue number in branch name to avoid collisions across runs
  branch_name="architect/sprint-vision-${vision_issue}"
  pr_number=$(create_sprint_pr "architect: ${issue_title}" "$pitch" "$branch_name")
  if [ -z "$pr_number" ]; then
    log "WARNING: failed to create PR for vision issue #${vision_issue}"
    continue
  fi
  # Post footer comment
  post_pr_footer "$pr_number"
  # Add in-progress label to vision issue
  add_inprogress_label "$vision_issue"
  pitch_count=$((pitch_count + 1))
  log "Completed pitch for vision issue #${vision_issue} — PR #${pr_number}"
 done
 log "Generated ${pitch_count} sprint pitch(es)"
 # ── Run agent for response processing if needed ───────────────────────────
 # Always process ACCEPT/REJECT responses when present, regardless of new pitches
 if [ "${has_responses_to_process:-false}" = "true" ]; then
  log "Processing ACCEPT/REJECT responses on existing PRs"
  # Check if any PRs have responses that need agent handling
  needs_agent=false
  pr_numbers=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers=""
  for pr_num in $pr_numbers; do
    # Check for ACCEPT/REJECT in comments
    comments=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
      "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue
    # Check for review decisions (higher precedence)
    reviews=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
      "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}/reviews" 2>/dev/null) || reviews=""
    # Check for ACCEPT (APPROVED review or ACCEPT comment)
    if printf '%s' "$reviews" | jq -e '.[] | select(.state == "APPROVED")' >/dev/null 2>&1; then
      log "PR #${pr_num} has APPROVED review — needs agent handling"
      needs_agent=true
    elif printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qiE '^[^:]+: *ACCEPT'; then
      log "PR #${pr_num} has ACCEPT comment — needs agent handling"
      needs_agent=true
    elif printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qiE '^[^:]+: *REJECT:'; then
      log "PR #${pr_num} has REJECT comment — needs agent handling"
      needs_agent=true
    fi
  done
  # Run agent only if there are responses to process
  if [ "$needs_agent" = "true" ]; then
    # Determine session handling based on PR state
    RESUME_ARGS=()
    SESSION_MODE="fresh"
    if detect_questions_phase; then
      # PR is in questions-awaiting-answers phase — resume from that session
      if [ -f "$SID_FILE" ]; then
        RESUME_SESSION=$(cat "$SID_FILE")
        RESUME_ARGS=(--resume "$RESUME_SESSION")
        SESSION_MODE="questions_phase"
        log "PR in questions-awaiting-answers phase — resuming session: ${RESUME_SESSION:0:12}..."
      else
        log "PR in questions phase but no session file — starting fresh session"
      fi
    elif detect_approved_pending_questions; then
      # PR is approved but awaiting initial design questions — start fresh with special prompt
      SESSION_MODE="start_questions"
      log "PR approved and awaiting initial design questions — starting fresh session"
    else
      log "PR not in questions phase — starting fresh session"
    fi
    # Build prompt with appropriate mode
    PROMPT_FOR_MODE=$(build_architect_prompt_for_mode "$SESSION_MODE")
    agent_run "${RESUME_ARGS[@]}" --worktree "$WORKTREE" "$PROMPT_FOR_MODE"
    log "agent_run complete"
  fi
 fi
 # ── Clean up scratch files (legacy single file + per-issue files) ──────────
 rm -f "$SCRATCH_FILE"
 rm -f "${SCRATCH_FILE_PREFIX}"-*.md
 # Write journal entry post-session
 profile_write_journal "architect-run" "Architect run $(date -u +%Y-%m-%d)" "complete" "" || true
--- a/bin/disinto
+++ b/bin/disinto
--- a/dev/AGENTS.md
+++ b/dev/AGENTS.md
@ -1,22 +1,40 @@
-<!-- last-reviewed: 33eb565d7e0c5b7e0159e1720ba7f79126a7e25e -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Dev Agent
 **Role**: Implement issues autonomously — write code, push branches, address
 CI failures and review feedback.
-**Trigger**: `dev-poll.sh` runs every 10 min via cron. Sources `lib/guard.sh` and
+**Trigger**: `dev-poll.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh`
-calls `check_active dev` first — skips if `$FACTORY_ROOT/state/.dev-active` is
+every 5 minutes (iteration math at line 171-175). Sources `lib/guard.sh` and calls
-absent. Then performs a direct-merge scan (approved + CI green PRs — including
+`check_active dev` first — skips if `$FACTORY_ROOT/state/.dev-active` is absent. Then
-chore/gardener PRs without issue numbers), then checks the agent lock and scans
+performs a direct-merge scan (approved + CI green PRs — including chore/gardener PRs
-for ready issues using a two-tier priority queue: (1) `priority`+`backlog` issues
+without issue numbers), then checks the agent lock and scans for ready issues using a
-first (FIFO within tier), then (2) plain `backlog` issues (FIFO). Orphaned
+two-tier priority queue: (1) `priority`+`backlog` issues first (FIFO within tier), then
-in-progress issues are also picked up. The direct-merge scan runs before the lock
+(2) plain `backlog` issues (FIFO). Orphaned in-progress issues are also picked up. The
-check so approved PRs get merged even while a dev-agent session is active.
+direct-merge scan runs before the lock check so approved PRs get merged even while a
 dev-agent session is active.
 **Key files**:
- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `prediction/dismissed`, or `prediction/unreviewed`. **Race prevention**: checks issue assignee before claiming — skips if assigned to a different bot user. **Stale branch abandonment**: closes PRs and deletes branches that are behind `$PRIMARY_BRANCH` (restarts poll cycle for a fresh start).
+- `dev/dev-poll.sh` — Polling loop participant: finds next ready issue, handles merge/rebase
 of approved PRs, tracks CI fix attempts. Invoked by `docker/agents/entrypoint.sh` every 5
 minutes. `BOT_USER` is resolved once at startup via the Forge `/user` API and cached for
 all assignee checks. Formula guard skips issues labeled `formula`, `prediction/dismissed`,
 or `prediction/unreviewed`. **Race prevention**: checks issue assignee before claiming —
 skips if assigned to a different bot user. **Stale branch abandonment**: closes PRs and
 deletes branches that are behind `$PRIMARY_BRANCH` (restarts poll cycle for a fresh start).
 **Stale in-progress recovery**: on each poll cycle, scans for issues labeled `in-progress`.
 If the issue has a `vision` label, sets `BLOCKED_BY_INPROGRESS=true` and skips further
 stale checks (vision issues are managed by the architect). If the issue is assigned to
 `$BOT_USER` (this agent), checks for pending review feedback first — if an open PR has
 `REQUEST_CHANGES`, spawns the dev-agent to address it before setting `BLOCKED_BY_INPROGRESS=true`;
 otherwise just sets blocked. If assigned to another agent, logs and falls through (does not
 block). If no assignee, no open PR, and no agent lock file — removes `in-progress`, adds
 `blocked` with a human-triage comment. **Per-agent open-PR gate**: before starting new work,
 filters open waiting PRs to only those assigned to this agent (`$BOT_USER`). Other agents'
 PRs do not block this agent's pipeline (#358, #369). **Pre-lock merge scan own-PRs only**:
 the direct-merge scan only merges PRs whose linked issue is assigned to this agent — skips
 PRs owned by other bot users (#374).
 - `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval
 - `dev/phase-handler.sh` — Phase callback functions: `post_refusal_comment()`, `_on_phase_change()`, `build_phase_protocol_prompt()`. `do_merge()` detects already-merged PRs on HTTP 405 (race with dev-poll's pre-lock scan) and returns success instead of escalating. Sources `lib/mirrors.sh` and calls `mirror_push()` after every successful merge.
 - `dev/phase-test.sh` — Integration test for the phase protocol
 **Environment variables consumed** (via `lib/env.sh` + project TOML):
@ -33,9 +51,9 @@ check so approved PRs get merged even while a dev-agent session is active.
 **Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h.
-**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux `dev-{project}-{issue}` → phase file
+**Lifecycle**: dev-poll.sh (invoked by polling loop, `check_active dev`) → dev-agent.sh →
-drives CI/review loop → merge + `mirror_push()` → close issue. On respawn after
+tmux session → phase file drives CI/review loop → merge + `mirror_push()` → close issue.
-`PHASE:escalate`, the stale phase file is cleared first so the session starts
+On respawn after `PHASE:escalate`, the stale phase file is cleared first so the session
-clean; the reinject prompt tells Claude not to re-escalate for the same reason.
+starts clean; the reinject prompt tells Claude not to re-escalate for the same reason.
-On respawn for any active PR, the prompt explicitly tells Claude the PR already
+On respawn for any active PR, the prompt explicitly tells Claude the PR already exists
-exists and not to create a new one via API.
+and not to create a new one via API.
--- a/dev/dev-agent.sh
+++ b/dev/dev-agent.sh
@ -268,8 +268,22 @@ log "forge remote: ${FORGE_REMOTE}"
 # First attempt: fix/issue-N, subsequent: fix/issue-N-1, fix/issue-N-2, etc.
 if [ "$RECOVERY_MODE" = false ]; then
  # Count only branches matching fix/issue-N, fix/issue-N-1, fix/issue-N-2, etc. (exact prefix match)
-  ATTEMPT=$(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}" 2>/dev/null | grep -c "refs/heads/fix/issue-${ISSUE}$" || echo 0)
+  # Use explicit error handling to avoid silent failure from set -e + pipefail when git ls-remote fails.
-  ATTEMPT=$((ATTEMPT + $(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}-*" 2>/dev/null | wc -l)))
+  if _lr1=$(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}" 2>&1); then
    ATTEMPT=$(printf '%s\n' "$_lr1" | grep -c "refs/heads/fix/issue-${ISSUE}$" || true)
  else
    log "WARNING: git ls-remote failed for attempt counting: $_lr1"
    ATTEMPT=0
  fi
  ATTEMPT="${ATTEMPT:-0}"
  if _lr2=$(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}-*" 2>&1); then
    # Guard on empty to avoid off-by-one: command substitution strips trailing newlines,
    # so wc -l undercounts by 1 when output exists. Re-add newline only if non-empty.
    ATTEMPT=$((ATTEMPT + $( [ -z "$_lr2" ] && echo 0 || printf '%s\n' "$_lr2" | wc -l )))
  else
    log "WARNING: git ls-remote failed for suffix counting: $_lr2"
  fi
  if [ "$ATTEMPT" -gt 0 ]; then
    BRANCH="fix/issue-${ISSUE}-${ATTEMPT}"
  fi
--- a/dev/dev-poll.sh
+++ b/dev/dev-poll.sh
@ -14,7 +14,7 @@
 #   3. Ready "backlog" issues without "priority" (FIFO within tier)
 #
 # Usage:
-#   cron every 10min
+#   Called by: entrypoint.sh polling loop (every 10 min)
 #   dev-poll.sh [projects/harb.toml]   # optional project config
 set -euo pipefail
@ -42,6 +42,11 @@ log() {
  printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
 }
 # Resolve current agent identity once at startup — cache for all assignee checks
 BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
  "${API%%/repos*}/user" | jq -r '.login') || BOT_USER=""
 log "running as agent: ${BOT_USER}"
 # =============================================================================
 # CI FIX TRACKER: per-PR counter to avoid infinite respawn loops (max 3)
 # =============================================================================
@ -98,20 +103,43 @@ is_blocked() {
 # STALENESS DETECTION FOR IN-PROGRESS ISSUES
 # =============================================================================
-# Check if a tmux session for a specific issue is alive
+# Check if in-progress label was added recently (within grace period).
-# Args: project_name issue_number
+# Prevents race where a poller marks an issue as stale before the claiming
-# Returns: 0 if session is alive, 1 if not
+# agent's assign + label sequence has fully propagated. See issue #471.
-session_is_alive() {
+# Args: issue_number [grace_seconds]
-  local project="$1" issue="$2"
+# Returns: 0 if recently added (within grace period), 1 if not
-  local session="dev-${project}-${issue}"
+in_progress_recently_added() {
-  tmux has-session -t "$session" 2>/dev/null
+  local issue="$1" grace="${2:-60}"
  local now label_ts delta
  now=$(date +%s)
  # Query issue timeline for the most recent in-progress label event.
  # Forgejo 11.x API returns type as string "label", not integer 7.
  label_ts=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${API}/issues/${issue}/timeline" | \
    jq -r '[.[] | select(.type == "label") | select(.label.name == "in-progress")] | last | .created_at // empty') || true
  if [ -z "$label_ts" ]; then
    return 1  # no label event found — not recently added
  fi
  # Convert ISO timestamp to epoch and compare
  local label_epoch
  label_epoch=$(date -d "$label_ts" +%s 2>/dev/null || echo 0)
  delta=$(( now - label_epoch ))
  if [ "$delta" -lt "$grace" ]; then
    return 0  # within grace period
  fi
  return 1
 }
 # Check if there's an open PR for a specific issue
-# Args: project_name issue_number
+# Args: issue_number
 # Returns: 0 if open PR exists, 1 if not
 open_pr_exists() {
-  local project="$1" issue="$2"
+  local issue="$1"
  local branch="fix/issue-${issue}"
  local pr_num
@ -152,12 +180,13 @@ relabel_stale_issue() {
  # Post diagnostic comment using shared helper
  local comment_body
  comment_body=$(
-    printf '### Stale in-progress issue detected\n\n'
+    printf '%s\n\n' '### Stale in-progress issue detected'
-    printf '| Field | Value |\n|---|---|\n'
+    printf '%s\n' '| Field | Value |'
    printf '%s\n' '|---|---|'
    printf '| Detection reason | `%s` |\n' "$reason"
    printf '| Timestamp | `%s` |\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
-    printf '\n**Status:** This issue was labeled `in-progress` but no active tmux session exists.\n'
+    printf '%s\n' '**Status:** This issue was labeled `in-progress` but has no assignee, no open PR, and no agent lock file.'
-    printf '**Action required:** A maintainer should triage this issue.\n'
+    printf '%s\n' '**Action required:** A maintainer should triage this issue.'
  )
  _ilc_post_comment "$issue" "$comment_body"
@ -348,6 +377,16 @@ for i in $(seq 0 $(($(echo "$PL_PRS" | jq 'length') - 1))); do
    jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true
  if [ "${PL_HAS_APPROVE:-0}" -gt 0 ]; then
    # Check if issue is assigned to this agent — only merge own PRs
    if [ "$PL_ISSUE" -gt 0 ]; then
      PR_ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
        "${API}/issues/${PL_ISSUE}") || true
      PR_ISSUE_ASSIGNEE=$(echo "$PR_ISSUE_JSON" | jq -r '.assignee.login // ""') || true
      if [ -n "$PR_ISSUE_ASSIGNEE" ] && [ "$PR_ISSUE_ASSIGNEE" != "$BOT_USER" ]; then
        log "PR #${PL_PR_NUM} (issue #${PL_ISSUE}) assigned to ${PR_ISSUE_ASSIGNEE} — skipping merge (not mine)"
        continue
      fi
    fi
    if try_direct_merge "$PL_PR_NUM" "$PL_ISSUE"; then
      PL_MERGED_ANY=true
    fi
@ -371,6 +410,9 @@ if [ -f "$LOCKFILE" ]; then
  rm -f "$LOCKFILE"
 fi
 # --- Fetch origin refs before any stale branch checks ---
 git fetch origin --prune 2>/dev/null || true
 # --- Memory guard ---
 memory_guard 2000
@ -379,23 +421,17 @@ memory_guard 2000
 # =============================================================================
 log "checking for in-progress issues"
 # Get current bot identity for assignee checks
 BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
  "${API%%/repos*}/user" | jq -r '.login') || BOT_USER=""
 ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
  "${API}/issues?state=open&labels=in-progress&limit=10&type=issues")
 ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length')
 BLOCKED_BY_INPROGRESS=false
 OTHER_AGENT_INPROGRESS=false
 if [ "$ORPHAN_COUNT" -gt 0 ]; then
  ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number')
-  # Staleness check: if no tmux session and no open PR, the issue is stale
+  # Staleness check: if no assignee, no open PR, and no agent lock, the issue is stale
  SESSION_ALIVE=false
  OPEN_PR=false
  if tmux has-session -t "dev-${PROJECT_NAME}-${ISSUE_NUM}" 2>/dev/null; then
    SESSION_ALIVE=true
  fi
  if curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${API}/pulls?state=open&limit=20" | \
    jq -e --arg branch "fix/issue-${ISSUE_NUM}" \
@ -403,14 +439,75 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
    OPEN_PR=true
  fi
-  if [ "$SESSION_ALIVE" = false ] && [ "$OPEN_PR" = false ]; then
+  # Skip vision-labeled issues — they are managed by architect agent, not dev-poll
-    log "issue #${ISSUE_NUM} is stale (no active tmux session, no open PR) — relabeling to blocked"
+  issue_labels=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
-    relabel_stale_issue "$ISSUE_NUM" "no_active_session_no_open_pr"
+    "${API}/issues/${ISSUE_NUM}" | jq -r '[.labels[].name] | join(",")')
-    exit 0
+  if echo "$issue_labels" | grep -q "vision"; then
    log "issue #${ISSUE_NUM} has 'vision' label — skipping stale detection (managed by architect)"
    BLOCKED_BY_INPROGRESS=true
  fi
  # Check if issue has an assignee — only block on issues assigned to this agent
  assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" "${API}/issues/${ISSUE_NUM}" | jq -r '.assignee.login // ""')
  if [ -n "$assignee" ]; then
    if [ "$assignee" = "$BOT_USER" ]; then
      # Check if my PR has review feedback to address before exiting
      HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
        "${API}/pulls?state=open&limit=20" | \
        jq -r --arg branch "fix/issue-${ISSUE_NUM}" \
        '.[] | select(.head.ref == $branch) | .number' | head -1) || true
      if [ -n "$HAS_PR" ]; then
        # Check for REQUEST_CHANGES review feedback
        REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
          "${API}/pulls/${HAS_PR}/reviews") || true
        HAS_CHANGES=$(echo "$REVIEWS_JSON" | \
          jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true
        if [ "${HAS_CHANGES:-0}" -gt 0 ]; then
          log "issue #${ISSUE_NUM} has review feedback — spawning agent"
          nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
          log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)"
          BLOCKED_BY_INPROGRESS=true
        else
          log "issue #${ISSUE_NUM} assigned to me — my thread is busy"
          BLOCKED_BY_INPROGRESS=true
        fi
      else
        log "issue #${ISSUE_NUM} assigned to me — my thread is busy"
        BLOCKED_BY_INPROGRESS=true
      fi
    else
      log "issue #${ISSUE_NUM} assigned to ${assignee} — their thread, not blocking"
      OTHER_AGENT_INPROGRESS=true
      # Issue assigned to another agent — skip stale checks but fall through to backlog
    fi
  fi
  # Only proceed with in-progress checks if not blocked by this agent's own work
  if [ "$BLOCKED_BY_INPROGRESS" = false ] && [ "$OTHER_AGENT_INPROGRESS" = false ]; then
    # Check for dev-agent lock file (agent may be running in another container)
    LOCK_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE_NUM}.txt"
    if [ -f "$LOCK_FILE" ]; then
      log "issue #${ISSUE_NUM} has agent lock file — trusting active work"
      BLOCKED_BY_INPROGRESS=true
    fi
    if [ "$OPEN_PR" = false ] && [ "$BLOCKED_BY_INPROGRESS" = false ]; then
      # Grace period: skip if in-progress label was added <60s ago (issue #471)
      if in_progress_recently_added "$ISSUE_NUM" 60; then
        log "issue #${ISSUE_NUM} in-progress label added <60s ago — skipping stale detection (grace period)"
        BLOCKED_BY_INPROGRESS=true
      else
        log "issue #${ISSUE_NUM} is stale (no assignee, no open PR, no agent lock) — relabeling to blocked"
        relabel_stale_issue "$ISSUE_NUM" "no_assignee_no_open_pr_no_lock"
        BLOCKED_BY_INPROGRESS=true
      fi
    fi
    # Formula guard: formula-labeled issues should not be worked on by dev-agent.
    # Remove in-progress label and skip to prevent infinite respawn cycle (#115).
    if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
      ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true
      SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true
      if [ -n "$SKIP_LABEL" ]; then
@ -418,10 +515,12 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
        IP_ID=$(_ilc_in_progress_id)
        curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
          "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true
-    exit 0
+        BLOCKED_BY_INPROGRESS=true
      fi
    fi
    # Check if there's already an open PR for this issue
    if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
      HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
        "${API}/pulls?state=open&limit=20" | \
        jq -r --arg branch "fix/issue-${ISSUE_NUM}" \
@ -430,7 +529,7 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
      if [ -n "$HAS_PR" ]; then
        # Check if branch is stale (behind primary branch)
        BRANCH="fix/issue-${ISSUE_NUM}"
-    AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "999")
+        AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0")
        if [ "$AHEAD" -gt 0 ]; then
          log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR"
          # Close the PR via API
@ -444,10 +543,11 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
          # Reset to fresh start on primary branch
          git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true
          git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true
-      # Exit to restart poll cycle (issue will be picked up fresh)
+          BLOCKED_BY_INPROGRESS=true
      exit 0
        fi
        # Only process PR if not abandoned (stale branch check above)
        if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
          PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
            "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true
          CI_STATE=$(ci_commit_status "$PR_SHA") || true
@ -468,21 +568,32 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
          if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then
            if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then
-        exit 0
+              BLOCKED_BY_INPROGRESS=true
-      fi
+            else
              # Direct merge failed (conflicts?) — fall back to dev-agent
              log "falling back to dev-agent for PR #${HAS_PR} merge"
              nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
              log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)"
-      exit 0
+              BLOCKED_BY_INPROGRESS=true
            fi
          # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is
          # pending/unknown. Definitive CI failure is handled by the elif below.
          elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then
            # Check if issue is assigned to this agent — skip if assigned to another bot
            ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
              "${API}/issues/${ISSUE_NUM}") || true
            assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true
            if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then
              log "issue #${ISSUE_NUM} PR #${HAS_PR} REQUEST_CHANGES but assigned to ${assignee} — skipping"
              # Don't block — fall through to backlog
              BLOCKED_BY_INPROGRESS=false
            else
              log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent"
              nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
              log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)"
-      exit 0
+              BLOCKED_BY_INPROGRESS=true
            fi
          elif ci_failed "$CI_STATE"; then
            if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then
@ -491,17 +602,19 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
            else
              # Increment at actual launch time (not on guard-hit paths)
              if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then
-          exit 0  # exhausted between check and launch
+                BLOCKED_BY_INPROGRESS=true  # exhausted between check and launch
-        fi
+              else
                log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)"
                nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
                log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)"
-        exit 0
+                BLOCKED_BY_INPROGRESS=true
              fi
            fi
          else
            log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)"
-      exit 0
+            BLOCKED_BY_INPROGRESS=true
          fi
        fi
      else
        # Check assignee before adopting orphaned issue
@ -515,12 +628,19 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
          IP_ID=$(_ilc_in_progress_id)
          curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
            "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true
-      exit 0
+          # Don't block — fall through to backlog
-    fi
+        else
          log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})"
          nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
          log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)"
          BLOCKED_BY_INPROGRESS=true
        fi
      fi
    fi
  fi
  # If blocked by in-progress work, exit now
  if [ "$BLOCKED_BY_INPROGRESS" = true ]; then
    exit 0
  fi
 fi
@ -590,6 +710,14 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do
  # Stuck: REQUEST_CHANGES or CI failure -> spawn agent
  if [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then
    # Check if issue is assigned to this agent — skip if assigned to another bot
    ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
      "${API}/issues/${STUCK_ISSUE}") || true
    assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true
    if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then
      log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) REQUEST_CHANGES but assigned to ${assignee} — skipping"
      continue  # skip this PR, check next stuck PR or fall through to backlog
    fi
    log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) has REQUEST_CHANGES — fixing first"
    nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 &
    log "started dev-agent PID $! for stuck PR #${PR_NUM}"
@ -682,7 +810,7 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do
  if [ -n "$EXISTING_PR" ]; then
    # Check if branch is stale (behind primary branch)
    BRANCH="fix/issue-${ISSUE_NUM}"
-    AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "999")
+    AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0")
    if [ "$AHEAD" -gt 0 ]; then
      log "issue #${ISSUE_NUM} PR #${EXISTING_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR"
      # Close the PR via API
@ -757,9 +885,32 @@ done
 # Single-threaded per project: if any issue has an open PR waiting for review/CI,
 # don't start new work — let the pipeline drain first
 # But only block on PRs assigned to this agent (per-agent logic from #358)
 if [ -n "$READY_ISSUE" ] && [ -n "${WAITING_PRS:-}" ]; then
-  log "holding #${READY_ISSUE} — waiting for open PR(s) to land first: ${WAITING_PRS}"
+  # Filter to only this agent's waiting PRs
  MY_WAITING_PRS=""
  for pr_num in $(echo "$WAITING_PRS" | tr ',' ' '); do
    pr_num="${pr_num#\#}"  # Remove leading #
    # Check if this PR's issue is assigned to this agent
    pr_info=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
      "${API}/pulls/${pr_num}" 2>/dev/null) || true
    pr_branch=$(echo "$pr_info" | jq -r '.head.ref') || true
    issue_num=$(echo "$pr_branch" | grep -oP '(?<=fix/issue-)\d+' || true)
    if [ -z "$issue_num" ]; then
      continue
    fi
    issue_assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
      "${API}/issues/${issue_num}" 2>/dev/null | jq -r '.assignee.login // ""') || true
    if [ -n "$issue_assignee" ] && [ "$issue_assignee" = "$BOT_USER" ]; then
      MY_WAITING_PRS="${MY_WAITING_PRS:-}${MY_WAITING_PRS:+, }#${pr_num}"
    fi
  done
  if [ -n "$MY_WAITING_PRS" ]; then
    log "holding #${READY_ISSUE} — waiting for my open PR(s) to land first: ${MY_WAITING_PRS}"
    exit 0
  fi
  log "other agents' PRs waiting: ${WAITING_PRS} — proceeding with #${READY_ISSUE}"
 fi
 if [ -z "$READY_ISSUE" ]; then
--- a/dev/phase-handler.sh
+++ b/dev/phase-handler.sh
@ -1,820 +0,0 @@
 #!/usr/bin/env bash
 # dev/phase-handler.sh — Phase callback functions for dev-agent.sh
 #
 # Source this file from agent orchestrators after lib/agent-session.sh is loaded.
 # Defines: post_refusal_comment(), _on_phase_change(), build_phase_protocol_prompt()
 #
 # Required globals (set by calling agent before or after sourcing):
 #   ISSUE, FORGE_TOKEN, API, FORGE_WEB, PROJECT_NAME, FACTORY_ROOT
 #   BRANCH, PHASE_FILE, WORKTREE, IMPL_SUMMARY_FILE
 #   PRIMARY_BRANCH, SESSION_NAME, LOGFILE, ISSUE_TITLE
 #   WOODPECKER_REPO_ID, WOODPECKER_TOKEN, WOODPECKER_SERVER
 #
 # Globals with defaults (agents can override after sourcing):
 #   PR_NUMBER, CI_POLL_TIMEOUT, MAX_CI_FIXES, MAX_REVIEW_ROUNDS,
 #   REVIEW_POLL_TIMEOUT, CI_RETRY_COUNT, CI_FIX_COUNT, REVIEW_ROUND,
 #   CLAIMED, PHASE_POLL_INTERVAL
 #
 # Calls back to agent-defined helpers:
 #   cleanup_worktree(), cleanup_labels(), status(), log()
 #
 # shellcheck shell=bash
 # shellcheck disable=SC2154  # globals are set in dev-agent.sh before calling
 # shellcheck disable=SC2034  # CLAIMED is read by cleanup() in dev-agent.sh
 # Load secret scanner for redacting tmux output before posting to issues
 # shellcheck source=../lib/secret-scan.sh
 source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh"
 # Load shared CI helpers (is_infra_step, classify_pipeline_failure, etc.)
 # shellcheck source=../lib/ci-helpers.sh
 source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh"
 # Load mirror push helper
 # shellcheck source=../lib/mirrors.sh
 source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh"
 # --- Default callback stubs (agents can override after sourcing) ---
 # cleanup_worktree and cleanup_labels are called during phase transitions.
 # Provide no-op defaults so phase-handler.sh is self-contained; sourcing
 # agents override these with real implementations.
 if ! declare -f cleanup_worktree >/dev/null 2>&1; then
  cleanup_worktree() { :; }
 fi
 if ! declare -f cleanup_labels >/dev/null 2>&1; then
  cleanup_labels() { :; }
 fi
 # --- Default globals (agents can override after sourcing) ---
 : "${CI_POLL_TIMEOUT:=1800}"
 : "${REVIEW_POLL_TIMEOUT:=10800}"
 : "${MAX_CI_FIXES:=3}"
 : "${MAX_REVIEW_ROUNDS:=5}"
 : "${CI_RETRY_COUNT:=0}"
 : "${CI_FIX_COUNT:=0}"
 : "${REVIEW_ROUND:=0}"
 : "${PR_NUMBER:=}"
 : "${CLAIMED:=false}"
 : "${PHASE_POLL_INTERVAL:=30}"
 # --- Post diagnostic comment + label issue as blocked ---
 # Captures tmux pane output, posts a structured comment on the issue, removes
 # in-progress label, and adds the "blocked" label.
 #
 # Args: reason [session_name]
 # Uses globals: ISSUE, SESSION_NAME, PR_NUMBER, FORGE_TOKEN, API
 post_blocked_diagnostic() {
  local reason="$1"
  local session="${2:-${SESSION_NAME:-}}"
  # Capture last 50 lines from tmux pane (before kill)
  local tmux_output=""
  if [ -n "$session" ] && tmux has-session -t "$session" 2>/dev/null; then
    tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true)
  fi
  # Redact any secrets from tmux output before posting to issue
  if [ -n "$tmux_output" ]; then
    tmux_output=$(redact_secrets "$tmux_output")
  fi
  # Build diagnostic comment body
  local comment
  comment="### Session failure diagnostic
 | Field | Value |
 |---|---|
 | Exit reason | \`${reason}\` |
 | Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` |"
  [ -n "${PR_NUMBER:-}" ] && [ "${PR_NUMBER:-0}" != "0" ] && \
    comment="${comment}
 | PR | #${PR_NUMBER} |"
  if [ -n "$tmux_output" ]; then
    comment="${comment}
 <details><summary>Last 50 lines from tmux pane</summary>
 \`\`\`
 ${tmux_output}
 \`\`\`
 </details>"
  fi
  # Post comment to issue
  curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${API}/issues/${ISSUE}/comments" \
    -d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true
  # Remove in-progress, add blocked
  cleanup_labels
  local blocked_id
  blocked_id=$(ensure_blocked_label_id)
  if [ -n "$blocked_id" ]; then
    curl -sf -X POST \
      -H "Authorization: token ${FORGE_TOKEN}" \
      -H "Content-Type: application/json" \
      "${API}/issues/${ISSUE}/labels" \
      -d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true
  fi
  CLAIMED=false
  _BLOCKED_POSTED=true
 }
 # --- Build phase protocol prompt (shared across agents) ---
 # Generates the phase-signaling instructions for Claude prompts.
 # Args: phase_file summary_file branch [remote]
 # Output: The protocol text (stdout)
 build_phase_protocol_prompt() {
  local _pf="$1" _sf="$2" _br="$3" _remote="${4:-${FORGE_REMOTE:-origin}}"
  cat <<_PHASE_PROTOCOL_EOF_
 ## Phase-Signaling Protocol (REQUIRED)
 You are running in a persistent tmux session managed by an orchestrator.
 Communicate progress by writing to the phase file. The orchestrator watches
 this file and injects events (CI results, review feedback) back into this session.
 ### Key files
 \`\`\`
 PHASE_FILE="${_pf}"
 SUMMARY_FILE="${_sf}"
 \`\`\`
 ### Phase transitions — write these exactly:
 **After committing and pushing your branch:**
 \`\`\`bash
 # Rebase on target branch before push to avoid merge conflicts
 git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH}
 git push ${_remote} ${_br}
 # Write a short summary of what you implemented:
 printf '%s' "<your summary>" > "\${SUMMARY_FILE}"
 # Signal the orchestrator to create the PR and watch for CI:
 echo "PHASE:awaiting_ci" > "${_pf}"
 \`\`\`
 Then STOP and wait. The orchestrator will inject CI results.
 **When you receive a "CI passed" injection:**
 \`\`\`bash
 echo "PHASE:awaiting_review" > "${_pf}"
 \`\`\`
 Then STOP and wait. The orchestrator will inject review feedback.
 **When you receive a "CI failed:" injection:**
 Fix the CI issue, then rebase on target branch and push:
 \`\`\`bash
 git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH}
 git push --force-with-lease ${_remote} ${_br}
 echo "PHASE:awaiting_ci" > "${_pf}"
 \`\`\`
 Then STOP and wait.
 **When you receive a "Review: REQUEST_CHANGES" injection:**
 Address ALL review feedback, then rebase on target branch and push:
 \`\`\`bash
 git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH}
 git push --force-with-lease ${_remote} ${_br}
 echo "PHASE:awaiting_ci" > "${_pf}"
 \`\`\`
 (CI runs again after each push — always write awaiting_ci, not awaiting_review)
 **When you need human help (CI exhausted, merge blocked, stuck on a decision):**
 \`\`\`bash
 printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}"
 \`\`\`
 Then STOP and wait. A human will review and respond via the forge.
 **On unrecoverable failure:**
 \`\`\`bash
 printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}"
 \`\`\`
 _PHASE_PROTOCOL_EOF_
 }
 # --- Merge helper ---
 # do_merge — attempt to merge PR via forge API.
 # Args: pr_num
 # Returns:
 #   0 = merged successfully
 #   1 = other failure (conflict, network error, etc.)
 #   2 = not enough approvals (HTTP 405) — PHASE:escalate already written
 do_merge() {
  local pr_num="$1"
  local merge_response merge_http_code merge_body
  merge_response=$(curl -s -w "\n%{http_code}" -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H 'Content-Type: application/json' \
    "${API}/pulls/${pr_num}/merge" \
    -d '{"Do":"merge","delete_branch_after_merge":true}') || true
  merge_http_code=$(echo "$merge_response" | tail -1)
  merge_body=$(echo "$merge_response" | sed '$d')
  if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then
    log "do_merge: PR #${pr_num} merged (HTTP ${merge_http_code})"
    return 0
  fi
  # HTTP 405 — could be "merge requirements not met" OR "already merged" (race with dev-poll).
  # Before escalating, check whether the PR was already merged by another agent.
  if [ "$merge_http_code" = "405" ]; then
    local pr_state
    pr_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
      "${API}/pulls/${pr_num}" | jq -r '.merged // false') || pr_state="false"
    if [ "$pr_state" = "true" ]; then
      log "do_merge: PR #${pr_num} already merged (detected after HTTP 405) — treating as success"
      return 0
    fi
    log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}"
    printf 'PHASE:escalate\nReason: %s\n' \
      "PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \
      > "$PHASE_FILE"
    return 2
  fi
  log "do_merge: PR #${pr_num} merge failed (HTTP ${merge_http_code}): ${merge_body:0:200}"
  return 1
 }
 # --- Refusal comment helper ---
 post_refusal_comment() {
  local emoji="$1" title="$2" body="$3"
  local last_has_title
  last_has_title=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${API}/issues/${ISSUE}/comments?limit=5" | \
    jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true
  if [ "$last_has_title" = "true" ]; then
    log "skipping duplicate refusal comment: ${title}"
    return 0
  fi
  local comment
  comment="${emoji} **Dev-agent: ${title}**
 ${body}
 ---
 *Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*"
  printf '%s' "$comment" > "/tmp/refusal-comment.txt"
  jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json"
  curl -sf -o /dev/null -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${API}/issues/${ISSUE}/comments" \
    --data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \
    log "WARNING: failed to post refusal comment"
  rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json"
 }
 # =============================================================================
 # PHASE DISPATCH CALLBACK
 # =============================================================================
 # _on_phase_change — Phase dispatch callback for monitor_phase_loop
 # Receives the current phase as $1.
 # Returns 0 to continue the loop, 1 to break (terminal phase reached).
 _on_phase_change() {
  local phase="$1"
  # ── PHASE: awaiting_ci ──────────────────────────────────────────────────────
  if [ "$phase" = "PHASE:awaiting_ci" ]; then
    # Release session lock — Claude is idle during CI polling (#724)
    session_lock_release
    # Create PR if not yet created
    if [ -z "${PR_NUMBER:-}" ]; then
      status "creating PR for issue #${ISSUE}"
      IMPL_SUMMARY=""
      if [ -f "$IMPL_SUMMARY_FILE" ]; then
        # Don't treat refusal JSON as a PR summary
        if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then
          IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE")
        fi
      fi
      printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt"
      jq -n \
        --arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \
        --rawfile body "/tmp/pr-body-${ISSUE}.txt" \
        --arg head "$BRANCH" \
        --arg base "${PRIMARY_BRANCH}" \
        '{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json"
      PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \
        -H "Authorization: token ${FORGE_TOKEN}" \
        -H "Content-Type: application/json" \
        "${API}/pulls" \
        --data-binary @"/tmp/pr-request-${ISSUE}.json")
      PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1)
      PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d')
      rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json"
      if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then
        PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number')
        log "created PR #${PR_NUMBER}"
      elif [ "$PR_HTTP_CODE" = "409" ]; then
        # PR already exists (race condition) — find it
        FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
          "${API}/pulls?state=open&limit=20" | \
          jq -r --arg branch "$BRANCH" \
          '.[] | select(.head.ref == $branch) | .number' | head -1) || true
        if [ -n "$FOUND_PR" ]; then
          PR_NUMBER="$FOUND_PR"
          log "PR already exists: #${PR_NUMBER}"
        else
          log "ERROR: PR creation got 409 but no existing PR found"
          agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the forge API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed."
          return 0
        fi
      else
        log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})"
        agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push ${FORGE_REMOTE:-origin} ${BRANCH}. Then write PHASE:awaiting_ci again."
        return 0
      fi
    fi
    # No CI configured? Treat as success immediately
    if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then
      log "no CI configured — treating as passed"
      agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER} (no CI configured for this project).
 Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback."
      return 0
    fi
    # Poll CI until done or timeout
    status "waiting for CI on PR #${PR_NUMBER}"
    CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \
      curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
        "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha')
    CI_DONE=false
    CI_STATE="unknown"
    CI_POLL_ELAPSED=0
    while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do
      sleep 30
      CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 ))
      # Check session still alive during CI wait (exit_marker + tmux fallback)
      if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then
        log "session died during CI wait"
        break
      fi
      # Re-fetch HEAD — Claude may have pushed new commits since loop started
      CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || echo "$CI_CURRENT_SHA")
      CI_STATE=$(ci_commit_status "$CI_CURRENT_SHA")
      if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then
        CI_DONE=true
        [ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0
        break
      fi
    done
    if ! $CI_DONE; then
      log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s"
      agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed."
      return 0
    fi
    log "CI: ${CI_STATE}"
    if [ "$CI_STATE" = "success" ]; then
      agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER}.
 Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback:
  echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\""
    else
      # Fetch CI error details
      PIPELINE_NUM=$(ci_pipeline_number "$CI_CURRENT_SHA")
      FAILED_STEP=""
      FAILED_EXIT=""
      IS_INFRA=false
      if [ -n "$PIPELINE_NUM" ]; then
        FAILED_INFO=$(curl -sf \
          -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \
          "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \
          jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true)
        FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1)
        FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2)
      fi
      log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}"
      if [ -n "$FAILED_STEP" ] && is_infra_step "$FAILED_STEP" "${FAILED_EXIT:-0}" >/dev/null 2>&1; then
        IS_INFRA=true
      fi
      if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then
        CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 ))
        log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})"
        (cd "$WORKTREE" && git commit --allow-empty \
          -m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1)
        # Rebase on target branch before push to avoid merge conflicts
        if ! (cd "$WORKTREE" && \
          git fetch "${FORGE_REMOTE:-origin}" "${PRIMARY_BRANCH}" 2>/dev/null && \
          git rebase "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}" 2>&1 | tail -5); then
          log "rebase conflict detected — aborting, agent must resolve"
          (cd "$WORKTREE" && git rebase --abort 2>/dev/null || git reset --hard HEAD 2>/dev/null) || true
          agent_inject_into_session "$SESSION_NAME" "REBASE CONFLICT: Cannot rebase onto ${PRIMARY_BRANCH} automatically.
 Please resolve merge conflicts manually:
 1. Check conflict status: git status
 2. Resolve conflicts in the conflicted files
 3. Stage resolved files: git add <files>
 4. Continue rebase: git rebase --continue
 If you cannot resolve conflicts, abort: git rebase --abort
 Then write PHASE:escalate with a reason."
          return 0
        fi
        # Rebase succeeded — push the result
        (cd "$WORKTREE" && git push --force-with-lease "${FORGE_REMOTE:-origin}" "$BRANCH" 2>&1 | tail -3)
        # Touch phase file so we recheck CI on the new SHA
        # Do NOT update LAST_PHASE_MTIME here — let the main loop detect the fresh mtime
        touch "$PHASE_FILE"
        CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true)
        return 0
      fi
      CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 ))
      _ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}"
      if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then
        log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating"
        printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE"
        # Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate
        return 0
      fi
      CI_ERROR_LOG=""
      if [ -n "$PIPELINE_NUM" ]; then
        CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "")
      fi
      # Save CI result for crash recovery
      printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \
        "$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \
        > "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true
      agent_inject_into_session "$SESSION_NAME" "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}).
 Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?})
 CI debug tool:
  bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0}
  bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} <step-name>
 Error snippet:
 ${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.}
 Instructions:
 1. Run ci-debug.sh failures to get the full error output.
 2. Read the failing test file(s) — understand what the tests EXPECT.
 3. Fix the root cause — do NOT weaken tests.
 4. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}
  git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH}
 5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
 6. Stop and wait."
    fi
  # ── PHASE: awaiting_review ──────────────────────────────────────────────────
  elif [ "$phase" = "PHASE:awaiting_review" ]; then
    # Release session lock — Claude is idle during review wait (#724)
    session_lock_release
    status "waiting for review on PR #${PR_NUMBER:-?}"
    CI_FIX_COUNT=0  # Reset CI fix budget for this review cycle
    if [ -z "${PR_NUMBER:-}" ]; then
      log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR"
      FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
        "${API}/pulls?state=open&limit=20" | \
        jq -r --arg branch "$BRANCH" \
        '.[] | select(.head.ref == $branch) | .number' | head -1) || true
      if [ -n "$FOUND_PR" ]; then
        PR_NUMBER="$FOUND_PR"
        log "found PR #${PR_NUMBER}"
      else
        agent_inject_into_session "$SESSION_NAME" "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push ${FORGE_REMOTE:-origin} ${BRANCH}, then write PHASE:awaiting_ci."
        return 0
      fi
    fi
    REVIEW_POLL_ELAPSED=0
    REVIEW_FOUND=false
    while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do
      sleep 300  # 5 min between review checks
      REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 ))
      # Check session still alive (exit_marker + tmux fallback)
      if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then
        log "session died during review wait"
        REVIEW_FOUND=false
        break
      fi
      # Check if phase was updated while we wait (e.g., Claude reacted to something)
      NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0)
      if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then
        log "phase file updated during review wait — re-entering main loop"
        # Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer
        # loop detects the change on its next tick and dispatches the new phase.
        REVIEW_FOUND=true  # Prevent timeout injection
        # Clean up review-poll sentinel if it exists (session already advanced)
        rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
        break
      fi
      REVIEW_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
        "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true
      REVIEW_COMMENT=$(forge_api_all "/issues/${PR_NUMBER}/comments" | \
        jq -r --arg sha "$REVIEW_SHA" \
        '[.[] | select(.body | contains("<!-- reviewed: " + $sha))] | last // empty') || true
      if [ -n "$REVIEW_COMMENT" ] && [ "$REVIEW_COMMENT" != "null" ]; then
        REVIEW_TEXT=$(echo "$REVIEW_COMMENT" | jq -r '.body')
        # Skip error reviews — they have no verdict
        if echo "$REVIEW_TEXT" | grep -q "review-error\|Review — Error"; then
          log "review was an error, waiting for re-review"
          continue
        fi
        VERDICT=$(echo "$REVIEW_TEXT" | grep -oP '\*\*(APPROVE|REQUEST_CHANGES|DISCUSS)\*\*' | head -1 | tr -d '*' || true)
        log "review verdict: ${VERDICT:-unknown}"
        # Also check formal forge reviews
        if [ -z "$VERDICT" ]; then
          VERDICT=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
            "${API}/pulls/${PR_NUMBER}/reviews" | \
            jq -r '[.[] | select(.stale == false)] | last | .state // empty' || true)
          if [ "$VERDICT" = "APPROVED" ]; then
            VERDICT="APPROVE"
          elif [ "$VERDICT" != "REQUEST_CHANGES" ]; then
            VERDICT=""
          fi
          [ -n "$VERDICT" ] && log "verdict from formal review: $VERDICT"
        fi
        # Skip injection if review-poll.sh already injected (sentinel present).
        # Exception: APPROVE always falls through so do_merge() runs even when
        # review-poll injected first — prevents Claude writing PHASE:done on a
        # failed merge without the orchestrator detecting the error.
        REVIEW_SENTINEL="/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
        if [ -n "$VERDICT" ] && [ -f "$REVIEW_SENTINEL" ] && [ "$VERDICT" != "APPROVE" ]; then
          log "review already injected by review-poll (sentinel exists) — skipping"
          rm -f "$REVIEW_SENTINEL"
          REVIEW_FOUND=true
          break
        fi
        rm -f "$REVIEW_SENTINEL"  # consume sentinel before APPROVE handling below
        if [ "$VERDICT" = "APPROVE" ]; then
          REVIEW_FOUND=true
          _merge_rc=0; do_merge "$PR_NUMBER" || _merge_rc=$?
          if [ "$_merge_rc" -eq 0 ]; then
            # Merge succeeded — close issue and signal done
            curl -sf -X PATCH \
              -H "Authorization: token ${FORGE_TOKEN}" \
              -H 'Content-Type: application/json' \
              "${API}/issues/${ISSUE}" \
              -d '{"state":"closed"}' >/dev/null 2>&1 || true
            # Pull merged primary branch and push to mirrors
            git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE:-origin}" "$PRIMARY_BRANCH" 2>/dev/null || true
            git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true
            git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE:-origin}" "$PRIMARY_BRANCH" 2>/dev/null || true
            mirror_push
            printf 'PHASE:done\n' > "$PHASE_FILE"
          elif [ "$_merge_rc" -ne 2 ]; then
            # Other merge failure (conflict, etc.) — delegate to Claude for rebase + retry
            agent_inject_into_session "$SESSION_NAME" "Approved! PR #${PR_NUMBER} has been approved, but the merge failed (likely conflicts).
 Rebase onto ${PRIMARY_BRANCH} and push:
  git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}
  git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH}
  echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
 Do NOT merge or close the issue — the orchestrator handles that after CI passes.
 If rebase repeatedly fails, write PHASE:escalate with a reason."
          fi
          # _merge_rc=2: PHASE:escalate already written by do_merge()
          break
        elif [ "$VERDICT" = "REQUEST_CHANGES" ] || [ "$VERDICT" = "DISCUSS" ]; then
          REVIEW_ROUND=$(( REVIEW_ROUND + 1 ))
          if [ "$REVIEW_ROUND" -ge "$MAX_REVIEW_ROUNDS" ]; then
            log "hit max review rounds (${MAX_REVIEW_ROUNDS})"
            log "PR #${PR_NUMBER}: hit ${MAX_REVIEW_ROUNDS} review rounds, needs human attention"
          fi
          REVIEW_FOUND=true
          agent_inject_into_session "$SESSION_NAME" "Review feedback (round ${REVIEW_ROUND}) on PR #${PR_NUMBER}:
 ${REVIEW_TEXT}
 Instructions:
 1. Address each piece of feedback carefully.
 2. Run lint and tests when done.
 3. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}
  git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH}
 4. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
 5. Stop and wait for the next CI result."
          log "review REQUEST_CHANGES received (round ${REVIEW_ROUND})"
          break
        else
          # No verdict found in comment or formal review — keep waiting
          log "review comment found but no verdict, continuing to wait"
          continue
        fi
      fi
      # Check if PR was merged or closed externally
      PR_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
        "${API}/pulls/${PR_NUMBER}") || true
      PR_STATE=$(echo "$PR_JSON" | jq -r '.state // "unknown"')
      PR_MERGED=$(echo "$PR_JSON" | jq -r '.merged // false')
      if [ "$PR_STATE" != "open" ]; then
        if [ "$PR_MERGED" = "true" ]; then
          log "PR #${PR_NUMBER} was merged externally"
          curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
            -H "Content-Type: application/json" \
            "${API}/issues/${ISSUE}" -d '{"state":"closed"}' >/dev/null 2>&1 || true
          cleanup_labels
          agent_kill_session "$SESSION_NAME"
          cleanup_worktree
          rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}"
          exit 0
        else
          log "PR #${PR_NUMBER} was closed WITHOUT merge — NOT closing issue"
          cleanup_labels
          agent_kill_session "$SESSION_NAME"
          cleanup_worktree
          exit 0
        fi
      fi
      log "waiting for review on PR #${PR_NUMBER} (${REVIEW_POLL_ELAPSED}s elapsed)"
    done
    if ! $REVIEW_FOUND && [ "$REVIEW_POLL_ELAPSED" -ge "$REVIEW_POLL_TIMEOUT" ]; then
      log "TIMEOUT: no review after 3h"
      agent_inject_into_session "$SESSION_NAME" "TIMEOUT: No review received after 3 hours for PR #${PR_NUMBER}. Write PHASE:escalate to escalate to a human reviewer."
    fi
  # ── PHASE: escalate ──────────────────────────────────────────────────────
  elif [ "$phase" = "PHASE:escalate" ]; then
    status "escalated — waiting for human input on issue #${ISSUE}"
    ESCALATE_REASON=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null | sed 's/^Reason: //' || echo "")
    log "phase: escalate — reason: ${ESCALATE_REASON:-none}"
    # Session stays alive — human input arrives via vault/forge
  # ── PHASE: done ─────────────────────────────────────────────────────────────
  # PR merged and issue closed (by orchestrator or Claude). Just clean up local state.
  elif [ "$phase" = "PHASE:done" ]; then
    if [ -n "${PR_NUMBER:-}" ]; then
      status "phase done — PR #${PR_NUMBER} merged, cleaning up"
    else
      status "phase done — issue #${ISSUE} complete, cleaning up"
    fi
    # Belt-and-suspenders: ensure in-progress label removed (idempotent)
    cleanup_labels
    # Local cleanup
    agent_kill_session "$SESSION_NAME"
    cleanup_worktree
    rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
      "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
    [ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
    CLAIMED=false  # Don't unclaim again in cleanup()
  # ── PHASE: failed ───────────────────────────────────────────────────────────
  elif [ "$phase" = "PHASE:failed" ]; then
    if [[ -f "$PHASE_FILE" ]]; then
      FAILURE_REASON=$(sed -n '2p' "$PHASE_FILE" | sed 's/^Reason: //')
    fi
    FAILURE_REASON="${FAILURE_REASON:-unspecified}"
    log "phase: failed — reason: ${FAILURE_REASON}"
    # Gitea labels API requires []int64 — look up the "backlog" label ID once
    BACKLOG_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \
      | jq -r '.[] | select(.name == "backlog") | .id' 2>/dev/null || true)
    BACKLOG_LABEL_ID="${BACKLOG_LABEL_ID:-1300815}"
    UNDERSPECIFIED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \
      | jq -r '.[] | select(.name == "underspecified") | .id' 2>/dev/null || true)
    UNDERSPECIFIED_LABEL_ID="${UNDERSPECIFIED_LABEL_ID:-1300816}"
    # Check if this is a refusal (Claude wrote refusal JSON to IMPL_SUMMARY_FILE)
    REFUSAL_JSON=""
    if [ -f "$IMPL_SUMMARY_FILE" ] && jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then
      REFUSAL_JSON=$(cat "$IMPL_SUMMARY_FILE")
    fi
    if [ -n "$REFUSAL_JSON" ] && [ "$FAILURE_REASON" = "refused" ]; then
      REFUSAL_STATUS=$(printf '%s' "$REFUSAL_JSON" | jq -r '.status')
      log "claude refused: ${REFUSAL_STATUS}"
      # Write preflight result for dev-poll.sh
      printf '%s' "$REFUSAL_JSON" > "$PREFLIGHT_RESULT"
      # Unclaim issue (restore backlog label, remove in-progress)
      cleanup_labels
      curl -sf -X POST \
        -H "Authorization: token ${FORGE_TOKEN}" \
        -H "Content-Type: application/json" \
        "${API}/issues/${ISSUE}/labels" \
        -d "{\"labels\":[${BACKLOG_LABEL_ID}]}" >/dev/null 2>&1 || true
      case "$REFUSAL_STATUS" in
        unmet_dependency)
          BLOCKED_BY_MSG=$(printf '%s' "$REFUSAL_JSON" | jq -r '.blocked_by // "unknown"')
          SUGGESTION=$(printf '%s' "$REFUSAL_JSON" | jq -r '.suggestion // empty')
          COMMENT_BODY="### Blocked by unmet dependency
 ${BLOCKED_BY_MSG}"
          if [ -n "$SUGGESTION" ] && [ "$SUGGESTION" != "null" ]; then
            COMMENT_BODY="${COMMENT_BODY}
 **Suggestion:** Work on #${SUGGESTION} first."
          fi
          post_refusal_comment "🚧" "Unmet dependency" "$COMMENT_BODY"
          ;;
        too_large)
          REASON=$(printf '%s' "$REFUSAL_JSON" | jq -r '.reason // "unspecified"')
          post_refusal_comment "📏" "Too large for single session" "### Why this can't be implemented as-is
 ${REASON}
 ### Next steps
 A maintainer should split this issue or add more detail to the spec."
          curl -sf -X POST \
            -H "Authorization: token ${FORGE_TOKEN}" \
            -H "Content-Type: application/json" \
            "${API}/issues/${ISSUE}/labels" \
            -d "{\"labels\":[${UNDERSPECIFIED_LABEL_ID}]}" >/dev/null 2>&1 || true
          curl -sf -X DELETE \
            -H "Authorization: token ${FORGE_TOKEN}" \
            "${API}/issues/${ISSUE}/labels/${BACKLOG_LABEL_ID}" >/dev/null 2>&1 || true
          ;;
        already_done)
          REASON=$(printf '%s' "$REFUSAL_JSON" | jq -r '.reason // "unspecified"')
          post_refusal_comment "✅" "Already implemented" "### Existing implementation
 ${REASON}
 Closing as already implemented."
          curl -sf -X PATCH \
            -H "Authorization: token ${FORGE_TOKEN}" \
            -H "Content-Type: application/json" \
            "${API}/issues/${ISSUE}" \
            -d '{"state":"closed"}' >/dev/null 2>&1 || true
          ;;
        *)
          post_refusal_comment "❓" "Unable to proceed" "The dev-agent could not process this issue.
 Raw response:
 \`\`\`json
 $(printf '%s' "$REFUSAL_JSON" | head -c 2000)
 \`\`\`"
          ;;
      esac
      CLAIMED=false  # Don't unclaim again in cleanup()
      agent_kill_session "$SESSION_NAME"
      cleanup_worktree
      rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
        "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
      [ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
      return 1
    else
      # Genuine unrecoverable failure — label blocked with diagnostic
      log "session failed: ${FAILURE_REASON}"
      post_blocked_diagnostic "$FAILURE_REASON"
      agent_kill_session "$SESSION_NAME"
      if [ -n "${PR_NUMBER:-}" ]; then
        log "keeping worktree (PR #${PR_NUMBER} still open)"
      else
        cleanup_worktree
      fi
      rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
        "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
      [ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
      return 1
    fi
  # ── PHASE: crashed ──────────────────────────────────────────────────────────
  # Session died unexpectedly (OOM kill, tmux crash, etc.). Label blocked with
  # diagnostic comment so humans can triage directly on the issue.
  elif [ "$phase" = "PHASE:crashed" ]; then
    log "session crashed for issue #${ISSUE}"
    post_blocked_diagnostic "crashed"
    log "PRESERVED crashed worktree for debugging: $WORKTREE"
    rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
      "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
    [ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
  else
    log "WARNING: unknown phase value: ${phase}"
  fi
 }
--- a/dev/phase-test.sh
+++ b/dev/phase-test.sh
@ -8,8 +8,13 @@
 set -euo pipefail
-# Source canonical read_phase() from shared library
+# Inline read_phase() function (previously from lib/agent-session.sh)
-source "$(dirname "$0")/../lib/agent-session.sh"
+# Read the current phase from a phase file, stripped of whitespace.
 # Usage: read_phase [file]  — defaults to $PHASE_FILE
 read_phase() {
  local file="${1:-${PHASE_FILE:-}}"
  { cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]'
 }
 PROJECT="testproject"
 ISSUE="999"
@ -84,7 +89,7 @@ else
  fail "PHASE:failed format: first='$first_line' second='$second_line'"
 fi
-# ── Test 5: orchestrator read function (canonical read_phase from lib/agent-session.sh)
+# ── Test 5: orchestrator read function (inline read_phase)
 echo "PHASE:awaiting_ci" > "$PHASE_FILE"
 phase=$(read_phase "$PHASE_FILE")
 if [ "$phase" = "PHASE:awaiting_ci" ]; then
--- a/disinto-factory/SKILL.md
+++ b/disinto-factory/SKILL.md
@ -20,7 +20,7 @@ You are helping the user set up and operate a **disinto autonomous code factory*
 - The factory uses a single internal Forgejo as its forge, regardless of where mirrors go
 - Dev-agent uses `claude -p` for one-shot implementation sessions
 - Mirror pushes happen automatically after every merge
- Cron schedule: dev-poll every 5min, review-poll every 5min, gardener 4x/day
+- Polling loop in `docker/agents/entrypoint.sh`: dev-poll/review-poll every 5m, gardener/architect every 6h, planner every 12h, predictor every 24h
 ## References
--- a/disinto-factory/lessons-learned.md
+++ b/disinto-factory/lessons-learned.md
@ -1,54 +1,63 @@
-# Working with the factory — lessons learned
+# Lessons learned
-## Writing issues for the dev agent
+## Debugging & Diagnostics
-**Put everything in the issue body, not comments.** The dev agent reads the issue body when it starts work. It does not reliably read comments. If an issue fails and you need to add guidance for a retry, update the issue body.
+**Map the environment before changing code.** Silent failures often stem from runtime assumptions—missing paths, wrong user context, or unmet prerequisites. Verify the actual environment first.
-**One approach per issue, no choices.** The dev agent cannot make design decisions. If there are multiple ways to solve a problem, decide before filing. Issues with "Option A or Option B" will confuse the agent.
+**Silent termination is a logging failure.** When a script exits non-zero with no output, the bug is in error handling, not the command. Log at operation entry points, not just on success.
-**Issues must fit the templates.** Every backlog issue needs: affected files (max 3), acceptance criteria (max 5 checkboxes), and a clear proposed solution. If you cannot fill these fields, the issue is too big — label it `vision` and break it down first.
+**Pipefail is not a silver bullet.** It propagates exit codes but doesn't guarantee visibility. Pair with explicit error logging for external commands (git, curl, etc.).
-**Explicit dependencies prevent ordering bugs.** Add `Depends-on: #N` in the issue body. dev-poll checks these before pickup. Without explicit deps, the agent may attempt work on a stale codebase.
+**Debug the pattern, not the symptom.** If one HTTP call fails with 403, audit all similar calls. If one script has the same bug, find where it's duplicated.
-## Debugging CI failures
+## Shell Scripting Patterns
-**Check CI logs via Woodpecker SQLite when the API fails.** The Woodpecker v3 log API may return HTML instead of JSON. Reliable fallback:
+**Exit codes don't indicate output.** Commands like `grep -c` exit 1 when count is 0 but still output a number. Test both output and exit status independently.
 ```bash
 sqlite3 /var/lib/docker/volumes/disinto_woodpecker-data/_data/woodpecker.sqlite \
  "SELECT le.data FROM log_entries le \
   JOIN steps s ON le.step_id = s.id \
   JOIN workflows w ON s.pipeline_id = w.id \
   JOIN pipelines p ON w.pipeline_id = p.id \
   WHERE p.number = <N> AND s.name = '<step>' ORDER BY le.id"
 ```
-**When the agent fails repeatedly on CI, diagnose externally.** The dev agent cannot see CI log output (only pass/fail status). If the same step fails 3+ times, read the logs yourself and put the exact error and fix in the issue body.
+**The `||` pattern is fragile.** It appends on failure, doesn't replace output. Use command grouping or conditionals when output clarity matters.
-## Retrying failed issues
+**Arithmetic contexts are unforgiving.** `(( ))` fails on anything non-numeric. A stray newline or extra digit breaks everything.
-**Clean up stale branches before retrying.** Old branches cause recovery mode which inherits stale code. Close the PR, delete the branch on Forgejo, then relabel to backlog.
+**Source file boundaries matter.** Variables defined in sourced files are local unless exported. Trace the lifecycle: definition → export → usage.
-**After a dependency lands, stale branches miss the fix.** If issue B depends on A, and B's PR was created before A merged, B's branch is stale. Close the PR and delete the branch so the agent starts fresh from current main.
+## Environment & Deployment
-## Environment gotchas
+**User context matters at every layer.** When using `gosu`/`su-exec`, ensure all file operations occur under the target user. Create resources with explicit `chown` before dropping privileges.
-**Alpine/BusyBox differs from Debian.** CI and edge containers use Alpine:
+**Test under final runtime conditions.** Reproduce the exact user context the application will run under, not just "container runs."
 - `grep -P` (Perl regex) does not work — use `grep -E`
 - `USER` variable is unset — set it explicitly: `USER=$(whoami); export USER`
 - Network calls fail during `docker build` in LXD — download binaries on the host, COPY into images
-**The host repo drifts from Forgejo main.** If factory code is bind-mounted, the host checkout goes stale. Pull regularly or use versioned releases.
+**Fail fast with actionable diagnostics.** Entrypoints should exit immediately on dependency failures with clear messages explaining *why* and *what to do*.
-## Vault operations
+**Throttle retry loops.** Infinite retries without backoff mask underlying problems and look identical to healthy startups.
-**The human merging a vault PR must be a Forgejo site admin.** The dispatcher verifies `is_admin` on the merger. Promote your user via the Forgejo CLI or database if needed.
+## API & Integration
-**Result files cache failures.** If a vault action fails, the dispatcher writes `.result.json` and skips it. To retry: delete the result file inside the edge container.
+**Validate semantic types, not just names.** Don't infer resource type from naming conventions. Explicitly resolve whether an identifier is a user, org, or team before constructing URLs.
-## Breaking down large features
+**403 errors can signal semantic mismatches.** When debugging auth failures, consider whether the request is going to the wrong resource type.
-**Vision issues need structured decomposition.** When a feature touches multiple subsystems or has design forks, label it `vision`. Break it down by identifying what exists, what can be reused, where the design forks are, and resolve them before filing backlog issues.
+**Auth failures are rarely isolated.** If one endpoint requires credentials, scan for other unauthenticated calls. Environment assumptions about public access commonly break.
-**Prefer gluecode over greenfield.** Check if Forgejo API, Woodpecker, Docker, or existing lib/ functions can do the job before building new components.
+**Test against the most restrictive environment first.** If it works on a locked-down instance, it'll work everywhere.
-**Max 7 sub-issues per sprint.** If a breakdown produces more, split into two sprints.
+## State & Configuration
 **Idempotency requires state awareness.** Distinguish "needs setup" from "already configured." A naive always-rotate approach breaks reproducibility.
 **Audit the full dependency chain.** When modifying shared resources, trace all consumers. Embedded tokens create hidden coupling.
 **Check validity, not just existence.** Never assume a credential is invalid just because it exists. Verify expiry, permissions, or other validity criteria.
 **Conservative defaults become problematic defaults.** Timeouts and limits should reflect real-world expectations, not worst-case scenarios. When in doubt, start aggressive and fail fast.
 **Documentation and defaults must stay in sync.** When a default changes, docs should immediately reflect why.
 ## Validation & Testing
 **Add validation after critical operations.** If a migration commits N commits, verify N commits exist afterward. The extra lines are cheaper than debugging incomplete work.
 **Integration tests should cover both paths.** Test org and user scenarios, empty inputs, and edge cases explicitly.
 **Reproduce with minimal examples.** Running the exact pipeline with test cases that trigger edge conditions catches bugs early.
 **Treat "works locally but not in production" as environmental, not code.** The bug is in assumptions about the runtime, not the logic itself.
--- a/disinto-factory/setup.md
+++ b/disinto-factory/setup.md
@ -57,8 +57,8 @@ docker ps --format "table {{.Names}}\t{{.Status}}"
 # Token generated?
 grep WOODPECKER_TOKEN .env | grep -v "^$" && echo "OK" || echo "MISSING — see references/troubleshooting.md"
-# Agent cron active?
+# Agent entrypoint loop running?
-docker exec -u agent disinto-agents-1 crontab -l -u agent
+docker exec disinto-agents-1 tail -5 /home/agent/data/agent-entrypoint.log
 # Agent can reach Forgejo?
 docker exec disinto-agents-1 bash -c "source /home/agent/disinto/.env && curl -sf http://forgejo:3000/api/v1/version | jq .version"
@ -174,7 +174,7 @@ Use labels:
 ### 7. Watch it work
-The dev-agent polls every 5 minutes. Trigger manually to see it immediately:
+The dev-agent runs every 5 minutes via the entrypoint polling loop. Trigger manually to see it immediately:
 ```bash
 source .env
 export PROJECT_TOML=projects/<name>.toml
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,54 +0,0 @@
 version: "3.8"
 services:
  agents:
    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    image: disinto/agents:latest
    container_name: disinto-agents
    volumes:
      - ./data/agents:/home/agent/data
      - ./disinto:/home/agent/disinto:ro
      - /usr/local/bin/claude:/usr/local/bin/claude:ro
    environment:
      - DISINTO_AGENTS=review,gardener
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      - FORGE_TOKEN=${FORGE_TOKEN:-}
      - FORGE_URL=http://forgejo:3000
    depends_on:
      - forgejo
  agents-llama:
    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    image: disinto/agents-llama:latest
    container_name: disinto-agents-llama
    volumes:
      - ./data/llama:/home/agent/data
      - ./disinto:/home/agent/disinto:ro
      - /usr/local/bin/claude:/usr/local/bin/claude:ro
    environment:
      - DISINTO_AGENTS=dev
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      - FORGE_TOKEN=${FORGE_TOKEN:-}
      - FORGE_URL=http://forgejo:3000
      - PROJECT_TOML=projects/disinto.toml
      - FORGE_REPO=johba/disinto
    depends_on:
      - forgejo
  forgejo:
    image: codeberg.org/forgejo/forgejo:1
    container_name: disinto-forgejo
    volumes:
      - ./data/forgejo:/var/lib/forgejo
    environment:
      - FORGEJO__database__DB_TYPE=sqlite3
      - FORGEJO__service__REGISTER_EMAIL_CONFIRMATION=false
      - FORGEJO__service__ENABLE_NOTIFY_MAIL=false
      - FORGEJO__service__DISABLE_REGISTRATION=true
      - FORGEJO__service__REQUIRE_SIGNIN_VIEW=true
    ports:
      - "3000:3000"
--- a/docker/agents/Dockerfile
+++ b/docker/agents/Dockerfile
@ -1,7 +1,7 @@
 FROM debian:bookworm-slim
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    bash curl git jq tmux cron python3 python3-pip openssh-client ca-certificates age shellcheck procps \
+    bash curl git jq tmux python3 python3-pip openssh-client ca-certificates age shellcheck procps gosu \
    && pip3 install --break-system-packages networkx \
    && rm -rf /var/lib/apt/lists/*
@ -26,8 +26,8 @@ COPY . /home/agent/disinto
 COPY docker/agents/entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh
-# Entrypoint runs as root to start the cron daemon;
+# Entrypoint runs polling loop directly, dropping to agent user via gosu.
-# cron jobs execute as the agent user (crontab -u agent).
+# All scripts execute as the agent user (UID 1000) while preserving env vars.
 WORKDIR /home/agent/disinto
 ENTRYPOINT ["/entrypoint.sh"]
--- a/docker/agents/entrypoint-llama.sh
+++ b/docker/agents/entrypoint-llama.sh
@ -1,105 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 LOG_DIR="/home/agent/data/logs/dev"
 mkdir -p "$LOG_DIR" /home/agent/data
 chown -R agent:agent /home/agent/data 2>/dev/null || true
 log() {
  printf "[%s] llama-loop: %s\n" "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" | tee -a "$LOG_DIR/llama-loop.log"
 }
 # Apply token override for named agent identity
 if [ -n "${FORGE_TOKEN_OVERRIDE:-}" ]; then
  export FORGE_TOKEN="$FORGE_TOKEN_OVERRIDE"
 fi
 log "Starting llama dev-agent loop"
 log "Backend: ${ANTHROPIC_BASE_URL:-not set}"
 log "Claude CLI: $(claude --version 2>&1 || echo not found)"
 log "Agent identity: $(curl -sf -H "Authorization: token ${FORGE_TOKEN}" "${FORGE_URL:-http://forgejo:3000}/api/v1/user" 2>/dev/null | jq -r '.login // "unknown"')"
 # Clone repo if not present
 if [ ! -d "${PROJECT_REPO_ROOT}/.git" ]; then
  log "Cloning repo..."
  mkdir -p "$(dirname "$PROJECT_REPO_ROOT")"
  chown -R agent:agent /home/agent/repos 2>/dev/null || true
  su -s /bin/bash agent -c "git clone http://dev-bot:${FORGE_TOKEN}@forgejo:3000/${FORGE_REPO:-johba/disinto}.git ${PROJECT_REPO_ROOT}"
  log "Repo cloned"
 fi
 # Install crontab entries for agent user from project TOMLs
 install_project_crons() {
  local cron_lines="DISINTO_CONTAINER=1
 USER=agent
 FORGE_URL=http://forgejo:3000"
  # Parse DISINTO_AGENTS env var (default: all agents)
  # Expected format: comma-separated list like "review,gardener" or "dev"
  local agents_to_run="review,dev,gardener"
  if [ -n "${DISINTO_AGENTS:-}" ]; then
    agents_to_run="$DISINTO_AGENTS"
  fi
  for toml in "${DISINTO_DIR}"/projects/*.toml; do
    [ -f "$toml" ] || continue
    local pname
    pname=$(python3 -c "
 import sys, tomllib
 with open(sys.argv[1], 'rb') as f:
    print(tomllib.load(f)['name'])
 " "$toml" 2>/dev/null) || continue
    cron_lines="${cron_lines}
 PROJECT_REPO_ROOT=/home/agent/repos/${pname}
 # disinto: ${pname}"
    # Add review-poll only if review agent is configured
    if echo "$agents_to_run" | grep -qw "review"; then
      cron_lines="${cron_lines}
 2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${DISINTO_DIR}/review/review-poll.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
    fi
    # Add dev-poll only if dev agent is configured
    if echo "$agents_to_run" | grep -qw "dev"; then
      cron_lines="${cron_lines}
 4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${DISINTO_DIR}/dev/dev-poll.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
    fi
    # Add gardener-run only if gardener agent is configured
    if echo "$agents_to_run" | grep -qw "gardener"; then
      cron_lines="${cron_lines}
 0 0,6,12,18 * * * cd ${DISINTO_DIR} && bash gardener/gardener-run.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
    fi
  done
  if [ -n "$cron_lines" ]; then
    printf '%s\n' "$cron_lines" | crontab -u agent -
    log "Installed crontab for agent user (agents: ${agents_to_run})"
  else
    log "No project TOMLs found — crontab empty"
  fi
 }
 log "Entering poll loop (interval: ${POLL_INTERVAL:-300}s)"
 # Install and start cron daemon
 DISINTO_DIR="/home/agent/disinto"
 install_project_crons
 log "Starting cron daemon"
 cron
 log "cron daemon started"
 while true; do
  # Clear stale session IDs before each poll.
  # Local llama does not support --resume (no server-side session storage).
  # Stale .sid files cause agent_run to exit instantly on every retry.
  rm -f /tmp/dev-session-*.sid 2>/dev/null || true
  su -s /bin/bash agent -c "
    export FORGE_TOKEN='${FORGE_TOKEN}'
    cd /home/agent/disinto && \
    bash dev/dev-poll.sh ${PROJECT_TOML:-projects/disinto.toml}
  " >> "$LOG_DIR/llama-loop.log" 2>&1 || true
  sleep "${POLL_INTERVAL:-300}"
 done
--- a/docker/agents/entrypoint.sh
+++ b/docker/agents/entrypoint.sh
@ -1,73 +1,80 @@
 #!/usr/bin/env bash
 set -euo pipefail
-# entrypoint.sh — Start agent container with cron in foreground
+# entrypoint.sh — Start agent container with polling loop
 #
-# Runs as root inside the container.  Installs crontab entries for the
+# Runs as root inside the container.  Drops to agent user via gosu for all
-# agent user from project TOMLs, then starts cron in the foreground.
+# poll scripts.  All Docker Compose env vars are inherited (PATH, FORGE_TOKEN,
-# All cron jobs execute as the agent user (UID 1000).
+# ANTHROPIC_API_KEY, etc.).
 #
 # AGENT_ROLES env var controls which scripts run: "review,dev,gardener,architect,planner,predictor"
 # (default: all six). Uses while-true loop with staggered intervals:
 #   - review-poll: every 5 minutes (offset by 0s)
 #   - dev-poll: every 5 minutes (offset by 2 minutes)
 #   - gardener: every 6 hours (72 iterations * 5 min)
 #   - architect: every 6 hours (same as gardener)
 #   - planner: every 12 hours (144 iterations * 5 min)
 #   - predictor: every 24 hours (288 iterations * 5 min)
-DISINTO_DIR="/home/agent/disinto"
+DISINTO_BAKED="/home/agent/disinto"
 DISINTO_LIVE="/home/agent/repos/_factory"
 DISINTO_DIR="$DISINTO_BAKED"  # start with baked copy; switched to live checkout after bootstrap
 LOGFILE="/home/agent/data/agent-entrypoint.log"
-mkdir -p /home/agent/data
+
-chown agent:agent /home/agent/data
+# Create all expected log subdirectories and set ownership as root before dropping to agent.
 # This handles both fresh volumes and stale root-owned dirs from prior container runs.
 mkdir -p /home/agent/data/logs/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher}
 chown -R agent:agent /home/agent/data
 log() {
  printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" | tee -a "$LOGFILE"
 }
-# Build crontab from project TOMLs and install for the agent user.
+# Initialize state directory and files if they don't exist
-install_project_crons() {
+init_state_dir() {
-  local cron_lines="DISINTO_CONTAINER=1
+  local state_dir="${DISINTO_DIR}/state"
-USER=agent
+  mkdir -p "$state_dir"
-FORGE_URL=http://forgejo:3000"
+  # Create empty state files so check_active guards work
-
+  for agent in dev reviewer gardener architect planner predictor; do
-  # Parse DISINTO_AGENTS env var (default: all agents)
+    touch "$state_dir/.${agent}-active" 2>/dev/null || true
  # Expected format: comma-separated list like "review,gardener" or "dev"
  # Note: supervisor is NOT installed here — it runs on the host, not in container.
  # Supervisor requires host-level Docker access and pgrep, which the container lacks.
  local agents_to_run="review,dev,gardener"
  if [ -n "${DISINTO_AGENTS:-}" ]; then
    agents_to_run="$DISINTO_AGENTS"
  fi
  for toml in "${DISINTO_DIR}"/projects/*.toml; do
    [ -f "$toml" ] || continue
    local pname
    pname=$(python3 -c "
 import sys, tomllib
 with open(sys.argv[1], 'rb') as f:
    print(tomllib.load(f)['name'])
 " "$toml" 2>/dev/null) || continue
    cron_lines="${cron_lines}
 PROJECT_REPO_ROOT=/home/agent/repos/${pname}
 # disinto: ${pname}"
    # Add review-poll only if review agent is configured
    if echo "$agents_to_run" | grep -qw "review"; then
      cron_lines="${cron_lines}
 2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${DISINTO_DIR}/review/review-poll.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
    fi
    # Add dev-poll only if dev agent is configured
    if echo "$agents_to_run" | grep -qw "dev"; then
      cron_lines="${cron_lines}
 4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${DISINTO_DIR}/dev/dev-poll.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
    fi
    # Add gardener-run only if gardener agent is configured
    if echo "$agents_to_run" | grep -qw "gardener"; then
      cron_lines="${cron_lines}
 0 0,6,12,18 * * * cd ${DISINTO_DIR} && bash gardener/gardener-run.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
    fi
  done
  chown -R agent:agent "$state_dir"
  log "Initialized state directory"
 }
-  if [ -n "$cron_lines" ]; then
+# Source shared git credential helper library (#604).
-    printf '%s\n' "$cron_lines" | crontab -u agent -
+# shellcheck source=lib/git-creds.sh
-    log "Installed crontab for agent user (agents: ${agents_to_run})"
+source "${DISINTO_BAKED}/lib/git-creds.sh"
 # Wrapper that calls the shared configure_git_creds with agent-specific paths,
 # then repairs any legacy baked-credential URLs in existing clones.
 _setup_git_creds() {
  configure_git_creds "/home/agent" "gosu agent"
  if [ -n "${FORGE_PASS:-}" ] && [ -n "${FORGE_URL:-}" ]; then
    log "Git credential helper configured (password auth)"
  fi
  # Repair legacy clones with baked-in stale credentials (#604).
  _GIT_CREDS_LOG_FN=log repair_baked_cred_urls /home/agent/repos
 }
 # Configure tea CLI login for forge operations (runs as agent user).
 # tea stores config in ~/.config/tea/ — persistent across container restarts
 # only if that directory is on a mounted volume.
 configure_tea_login() {
  if command -v tea &>/dev/null && [ -n "${FORGE_TOKEN:-}" ] && [ -n "${FORGE_URL:-}" ]; then
    local_tea_login="forgejo"
    case "$FORGE_URL" in
      *codeberg.org*) local_tea_login="codeberg" ;;
    esac
    gosu agent bash -c "tea login add \
      --name '${local_tea_login}' \
      --url '${FORGE_URL}' \
      --token '${FORGE_TOKEN}' \
      --no-version-check 2>/dev/null || true"
    log "tea login configured: ${local_tea_login} → ${FORGE_URL}"
  else
-    log "No project TOMLs found — crontab empty"
+    log "tea login: skipped (tea not found or FORGE_TOKEN/FORGE_URL not set)"
  fi
 }
@ -98,26 +105,272 @@ else
  log "Run 'claude auth login' on the host, or set ANTHROPIC_API_KEY in .env"
 fi
-install_project_crons
+# Bootstrap ops repos for each project TOML (#586).
 # In compose mode the ops repo lives on a Docker named volume at
 # /home/agent/repos/<project>-ops.  If init ran migrate_ops_repo on the host
 # the container never saw those changes.  This function clones from forgejo
 # when the repo is missing, or configures the remote and pulls when it exists
 # but has no remote (orphaned local-only checkout).
 bootstrap_ops_repos() {
  local repos_dir="/home/agent/repos"
  mkdir -p "$repos_dir"
  chown agent:agent "$repos_dir"
-# Configure tea CLI login for forge operations (runs as agent user).
+  for toml in "${DISINTO_DIR}"/projects/*.toml; do
-# tea stores config in ~/.config/tea/ — persistent across container restarts
+    [ -f "$toml" ] || continue
 # only if that directory is on a mounted volume.
 if command -v tea &>/dev/null && [ -n "${FORGE_TOKEN:-}" ] && [ -n "${FORGE_URL:-}" ]; then
  local_tea_login="forgejo"
  case "$FORGE_URL" in
    *codeberg.org*) local_tea_login="codeberg" ;;
  esac
  su -s /bin/bash agent -c "tea login add \
    --name '${local_tea_login}' \
    --url '${FORGE_URL}' \
    --token '${FORGE_TOKEN}' \
    --no-version-check 2>/dev/null || true"
  log "tea login configured: ${local_tea_login} → ${FORGE_URL}"
 else
  log "tea login: skipped (tea not found or FORGE_TOKEN/FORGE_URL not set)"
 fi
-# Run cron in the foreground.  Cron jobs execute as the agent user.
+    # Extract project name, ops repo slug, repo slug, and primary branch from TOML
-log "Starting cron daemon"
+    local project_name ops_slug primary_branch
-exec cron -f
+    local _toml_vals
    _toml_vals=$(python3 -c "
 import tomllib, sys
 with open(sys.argv[1], 'rb') as f:
    cfg = tomllib.load(f)
 print(cfg.get('name', ''))
 print(cfg.get('ops_repo', ''))
 print(cfg.get('repo', ''))
 print(cfg.get('primary_branch', 'main'))
 " "$toml" 2>/dev/null || true)
    project_name=$(sed -n '1p' <<< "$_toml_vals")
    [ -n "$project_name" ] || continue
    ops_slug=$(sed -n '2p' <<< "$_toml_vals")
    local repo_slug
    repo_slug=$(sed -n '3p' <<< "$_toml_vals")
    primary_branch=$(sed -n '4p' <<< "$_toml_vals")
    primary_branch="${primary_branch:-main}"
    # Fall back to convention if ops_repo not in TOML
    if [ -z "$ops_slug" ]; then
      if [ -n "$repo_slug" ]; then
        ops_slug="${repo_slug}-ops"
      else
        ops_slug="disinto-admin/${project_name}-ops"
      fi
    fi
    local ops_root="${repos_dir}/${project_name}-ops"
    local remote_url="${FORGE_URL}/${ops_slug}.git"
    if [ ! -d "${ops_root}/.git" ]; then
      # Clone ops repo from forgejo
      log "Ops bootstrap: cloning ${ops_slug} -> ${ops_root}"
      if gosu agent git clone --quiet "$remote_url" "$ops_root" 2>/dev/null; then
        log "Ops bootstrap: ${ops_slug} cloned successfully"
      else
        # Remote may not exist yet (first run before init); create empty repo
        log "Ops bootstrap: clone failed for ${ops_slug} — initializing empty repo"
        gosu agent bash -c "
          mkdir -p '${ops_root}' && \
          git -C '${ops_root}' init --initial-branch='${primary_branch}' -q && \
          git -C '${ops_root}' remote add origin '${remote_url}'
        "
      fi
    else
      # Repo exists — ensure remote is configured and pull latest
      local current_remote
      current_remote=$(git -C "$ops_root" remote get-url origin 2>/dev/null || true)
      if [ -z "$current_remote" ]; then
        log "Ops bootstrap: adding missing remote to ${ops_root}"
        gosu agent git -C "$ops_root" remote add origin "$remote_url"
      elif [ "$current_remote" != "$remote_url" ]; then
        log "Ops bootstrap: fixing remote URL in ${ops_root}"
        gosu agent git -C "$ops_root" remote set-url origin "$remote_url"
      fi
      # Pull latest from forgejo to pick up any host-side migrations
      log "Ops bootstrap: pulling latest for ${project_name}-ops"
      gosu agent bash -c "
        cd '${ops_root}' && \
        git fetch origin '${primary_branch}' --quiet 2>/dev/null && \
        git reset --hard 'origin/${primary_branch}' --quiet 2>/dev/null
      " || log "Ops bootstrap: pull failed for ${ops_slug} (remote may not exist yet)"
    fi
  done
 }
 # Bootstrap the factory (disinto) repo from Forgejo into the project-repos
 # volume so the entrypoint runs from a live git checkout that receives
 # updates via `git pull`, not the stale baked copy from `COPY .` (#593).
 bootstrap_factory_repo() {
  local repo="${FACTORY_REPO:-}"
  if [ -z "$repo" ]; then
    log "Factory bootstrap: FACTORY_REPO not set — running from baked copy"
    return 0
  fi
  local remote_url="${FORGE_URL}/${repo}.git"
  local primary_branch="${PRIMARY_BRANCH:-main}"
  if [ ! -d "${DISINTO_LIVE}/.git" ]; then
    log "Factory bootstrap: cloning ${repo} -> ${DISINTO_LIVE}"
    if gosu agent git clone --quiet --branch "$primary_branch" "$remote_url" "$DISINTO_LIVE" 2>&1; then
      log "Factory bootstrap: cloned successfully"
    else
      log "Factory bootstrap: clone failed — running from baked copy"
      return 0
    fi
  else
    log "Factory bootstrap: pulling latest ${repo}"
    gosu agent bash -c "
      cd '${DISINTO_LIVE}' && \
      git fetch origin '${primary_branch}' --quiet 2>/dev/null && \
      git reset --hard 'origin/${primary_branch}' --quiet 2>/dev/null
    " || log "Factory bootstrap: pull failed — using existing checkout"
  fi
  # Copy project TOMLs from baked dir — they are gitignored AND docker-ignored,
  # so neither the image nor the clone normally contains them.  If the baked
  # copy has any (e.g. operator manually placed them), propagate them.
  if compgen -G "${DISINTO_BAKED}/projects/*.toml" >/dev/null 2>&1; then
    mkdir -p "${DISINTO_LIVE}/projects"
    cp "${DISINTO_BAKED}"/projects/*.toml "${DISINTO_LIVE}/projects/"
    chown -R agent:agent "${DISINTO_LIVE}/projects"
    log "Factory bootstrap: copied project TOMLs to live checkout"
  fi
  # Verify the live checkout has the expected structure
  if [ -f "${DISINTO_LIVE}/lib/env.sh" ]; then
    DISINTO_DIR="$DISINTO_LIVE"
    log "Factory bootstrap: DISINTO_DIR switched to live checkout at ${DISINTO_LIVE}"
  else
    log "Factory bootstrap: live checkout missing expected files — falling back to baked copy"
  fi
 }
 # Pull latest factory code at the start of each poll iteration (#593).
 # Runs as the agent user; failures are non-fatal (stale code still works).
 pull_factory_repo() {
  [ "$DISINTO_DIR" = "$DISINTO_LIVE" ] || return 0
  local primary_branch="${PRIMARY_BRANCH:-main}"
  gosu agent bash -c "
    cd '${DISINTO_LIVE}' && \
    git fetch origin '${primary_branch}' --quiet 2>/dev/null && \
    git reset --hard 'origin/${primary_branch}' --quiet 2>/dev/null
  " || log "Factory pull failed — continuing with current checkout"
 }
 # Configure git and tea once at startup (as root, then drop to agent)
 _setup_git_creds
 configure_tea_login
 # Bootstrap ops repos from forgejo into container volumes (#586)
 bootstrap_ops_repos
 # Bootstrap factory repo — switch DISINTO_DIR to live checkout (#593)
 bootstrap_factory_repo
 # Initialize state directory for check_active guards
 init_state_dir
 # Parse AGENT_ROLES env var (default: all agents)
 # Expected format: comma-separated list like "review,dev,gardener"
 AGENT_ROLES="${AGENT_ROLES:-review,dev,gardener,architect,planner,predictor}"
 log "Agent roles configured: ${AGENT_ROLES}"
 # Poll interval in seconds (5 minutes default)
 POLL_INTERVAL="${POLL_INTERVAL:-300}"
 log "Entering polling loop (interval: ${POLL_INTERVAL}s, roles: ${AGENT_ROLES})"
 # Main polling loop using iteration counter for gardener scheduling
 iteration=0
 while true; do
  iteration=$((iteration + 1))
  now=$(date +%s)
  # Pull latest factory code so poll scripts stay current (#593)
  pull_factory_repo
  # Stale .sid cleanup — needed for agents that don't support --resume
  # Run this as the agent user
  gosu agent bash -c "rm -f /tmp/dev-session-*.sid /tmp/review-session-*.sid 2>/dev/null || true"
  # Poll each project TOML
  # Fast agents (review-poll, dev-poll) run in background so they don't block
  # each other.  Slow agents (gardener, architect, planner, predictor) also run
  # in background but are guarded by pgrep so only one instance runs at a time.
  # The flock on session.lock already serializes claude -p calls.
  for toml in "${DISINTO_DIR}"/projects/*.toml; do
    [ -f "$toml" ] || continue
    log "Processing project TOML: ${toml}"
    # --- Fast agents: run in background, wait before slow agents ---
    # Review poll (every iteration)
    if [[ ",${AGENT_ROLES}," == *",review,"* ]]; then
      log "Running review-poll (iteration ${iteration}) for ${toml}"
      gosu agent bash -c "cd ${DISINTO_DIR} && bash review/review-poll.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/review-poll.log" 2>&1 &
    fi
    sleep 2  # stagger fast polls
    # Dev poll (every iteration)
    if [[ ",${AGENT_ROLES}," == *",dev,"* ]]; then
      log "Running dev-poll (iteration ${iteration}) for ${toml}"
      gosu agent bash -c "cd ${DISINTO_DIR} && bash dev/dev-poll.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/dev-poll.log" 2>&1 &
    fi
    # Wait for fast polls to finish before launching slow agents
    wait
    # --- Slow agents: run in background with pgrep guard ---
    # Gardener (every 6 hours = 72 iterations * 5 min = 21600 seconds)
    if [[ ",${AGENT_ROLES}," == *",gardener,"* ]]; then
      gardener_iteration=$((iteration * POLL_INTERVAL))
      gardener_interval=$((6 * 60 * 60))  # 6 hours in seconds
      if [ $((gardener_iteration % gardener_interval)) -eq 0 ] && [ "$now" -ge "$gardener_iteration" ]; then
        if ! pgrep -f "gardener-run.sh" >/dev/null; then
          log "Running gardener (iteration ${iteration}, 6-hour interval) for ${toml}"
          gosu agent bash -c "cd ${DISINTO_DIR} && bash gardener/gardener-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/gardener.log" 2>&1 &
        else
          log "Skipping gardener — already running"
        fi
      fi
    fi
    # Architect (every 6 hours, same schedule as gardener)
    if [[ ",${AGENT_ROLES}," == *",architect,"* ]]; then
      architect_iteration=$((iteration * POLL_INTERVAL))
      architect_interval=$((6 * 60 * 60))  # 6 hours in seconds
      if [ $((architect_iteration % architect_interval)) -eq 0 ] && [ "$now" -ge "$architect_iteration" ]; then
        if ! pgrep -f "architect-run.sh" >/dev/null; then
          log "Running architect (iteration ${iteration}, 6-hour interval) for ${toml}"
          gosu agent bash -c "cd ${DISINTO_DIR} && bash architect/architect-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/architect.log" 2>&1 &
        else
          log "Skipping architect — already running"
        fi
      fi
    fi
    # Planner (every 12 hours = 144 iterations * 5 min = 43200 seconds)
    if [[ ",${AGENT_ROLES}," == *",planner,"* ]]; then
      planner_iteration=$((iteration * POLL_INTERVAL))
      planner_interval=$((12 * 60 * 60))  # 12 hours in seconds
      if [ $((planner_iteration % planner_interval)) -eq 0 ] && [ "$now" -ge "$planner_iteration" ]; then
        if ! pgrep -f "planner-run.sh" >/dev/null; then
          log "Running planner (iteration ${iteration}, 12-hour interval) for ${toml}"
          gosu agent bash -c "cd ${DISINTO_DIR} && bash planner/planner-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/planner.log" 2>&1 &
        else
          log "Skipping planner — already running"
        fi
      fi
    fi
    # Predictor (every 24 hours = 288 iterations * 5 min = 86400 seconds)
    if [[ ",${AGENT_ROLES}," == *",predictor,"* ]]; then
      predictor_iteration=$((iteration * POLL_INTERVAL))
      predictor_interval=$((24 * 60 * 60))  # 24 hours in seconds
      if [ $((predictor_iteration % predictor_interval)) -eq 0 ] && [ "$now" -ge "$predictor_iteration" ]; then
        if ! pgrep -f "predictor-run.sh" >/dev/null; then
          log "Running predictor (iteration ${iteration}, 24-hour interval) for ${toml}"
          gosu agent bash -c "cd ${DISINTO_DIR} && bash predictor/predictor-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/predictor.log" 2>&1 &
        else
          log "Skipping predictor — already running"
        fi
      fi
    fi
  done
  sleep "${POLL_INTERVAL}"
 done
--- a/docker/edge/Dockerfile
+++ b/docker/edge/Dockerfile
@ -1,4 +1,4 @@
-FROM caddy:alpine
+FROM caddy:latest
-RUN apk add --no-cache bash jq curl git docker-cli
+RUN apk add --no-cache bash jq curl git docker-cli python3
 COPY entrypoint-edge.sh /usr/local/bin/entrypoint-edge.sh
 ENTRYPOINT ["bash", "/usr/local/bin/entrypoint-edge.sh"]
--- a/docker/edge/dispatcher.sh
+++ b/docker/edge/dispatcher.sh
@ -9,7 +9,7 @@
 # 3. Verify TOML arrived via merged PR with admin merger (Forgejo API)
 # 4. Validate TOML using vault-env.sh validator
 # 5. Decrypt .env.vault.enc and extract only declared secrets
-# 6. Launch: docker run --rm disinto-agents:latest <formula> <action-id>
+# 6. Launch: docker run --rm disinto/agents:latest <action-id>
 # 7. Write <action-id>.result.json with exit code, timestamp, logs summary
 #
 # Part of #76.
@ -22,6 +22,11 @@ SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
 # Source shared environment
 source "${SCRIPT_ROOT}/../lib/env.sh"
 # Project TOML location: prefer mounted path, fall back to cloned path
 # Edge container mounts ./projects to /opt/disinto-projects;
 # the shallow clone only has .toml.example files.
 PROJECTS_DIR="${PROJECTS_DIR:-${FACTORY_ROOT:-/opt/disinto}-projects}"
 # Load vault secrets after env.sh (env.sh unsets them for agent security)
 # Vault secrets must be available to the dispatcher
 if [ -f "$FACTORY_ROOT/.env.vault.enc" ] && command -v sops &>/dev/null; then
@ -47,9 +52,14 @@ VAULT_ENV="${SCRIPT_ROOT}/../vault/vault-env.sh"
 # Comma-separated list of Forgejo usernames with admin role
 ADMIN_USERS="${FORGE_ADMIN_USERS:-vault-bot,admin}"
-# Log function
+# Persistent log file for dispatcher
 DISPATCHER_LOG_FILE="${DISINTO_LOG_DIR:-/tmp}/dispatcher/dispatcher.log"
 mkdir -p "$(dirname "$DISPATCHER_LOG_FILE")"
 # Log function with standardized format
 log() {
-  printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
+  local agent="${LOG_AGENT:-dispatcher}"
  printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$DISPATCHER_LOG_FILE"
 }
 # -----------------------------------------------------------------------------
@ -298,7 +308,7 @@ is_action_completed() {
 # Validate a vault action TOML file
 # Usage: validate_action <toml_file>
-# Sets: VAULT_ACTION_ID, VAULT_ACTION_FORMULA, VAULT_ACTION_CONTEXT, VAULT_ACTION_SECRETS
+# Sets: VAULT_ACTION_ID, VAULT_ACTION_FORMULA, VAULT_ACTION_CONTEXT, VAULT_ACTION_SECRETS, VAULT_DISPATCH_MODE
 validate_action() {
  local toml_file="$1"
@ -320,6 +330,26 @@ validate_action() {
  return 0
 }
 # Extract dispatch_mode from TOML file
 # Usage: get_dispatch_mode <toml_file>
 # Returns: "direct" for direct-commit, "pr" for PR-merged, or empty if not specified
 get_dispatch_mode() {
  local toml_file="$1"
  local toml_content dispatch_mode
  toml_content=$(cat "$toml_file")
  # Extract dispatch_mode field if present
  dispatch_mode=$(echo "$toml_content" | grep -E '^dispatch_mode\s*=' | sed -E 's/^dispatch_mode\s*=\s*"(.*)"/\1/' | tr -d '\r')
  if [ -n "$dispatch_mode" ]; then
    echo "$dispatch_mode"
  else
    # Default to "pr" for backward compatibility (PR-based workflow)
    echo "pr"
  fi
 }
 # Write result file for an action
 # Usage: write_result <action_id> <exit_code> <logs>
 write_result() {
@ -362,29 +392,64 @@ launch_runner() {
    return 1
  fi
-  # Verify admin merge
+  # Check dispatch mode to determine if admin verification is needed
  local dispatch_mode
  dispatch_mode=$(get_dispatch_mode "$toml_file")
  if [ "$dispatch_mode" = "direct" ]; then
    log "Action ${action_id}: tier=${VAULT_TIER:-unknown}, dispatch_mode=${dispatch_mode} — skipping admin merge verification (direct commit)"
  else
    # Verify admin merge for PR-based actions
    log "Action ${action_id}: tier=${VAULT_TIER:-unknown}, dispatch_mode=${dispatch_mode} — verifying admin merge"
    if ! verify_admin_merged "$toml_file"; then
      log "ERROR: Admin merge verification failed for ${action_id}"
      write_result "$action_id" 1 "Admin merge verification failed: see logs above"
      return 1
    fi
    log "Action ${action_id}: admin merge verified"
  fi
  # Extract secrets from validated action
  local secrets_array
  secrets_array="${VAULT_ACTION_SECRETS:-}"
-  # Build command array (safe from shell injection)
+  # Build docker run command (self-contained, no compose context needed).
  # The edge container has the Docker socket but not the host's compose project,
  # so docker compose run would fail with exit 125. docker run is self-contained:
  # the dispatcher knows the image, network, env vars, and entrypoint.
  local -a cmd=(docker run --rm
    --name "vault-runner-${action_id}"
-    --network disinto_disinto-net
+    --network host
    --entrypoint bash
    -e DISINTO_CONTAINER=1
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
-    -e "FORGE_REPO=${FORGE_REPO}"
+    -e "FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}"
-    -e "FORGE_OPS_REPO=${FORGE_OPS_REPO}"
+    -e "FORGE_OPS_REPO=${FORGE_OPS_REPO:-}"
-    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH}"
+    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
    -e DISINTO_CONTAINER=1
  )
  # Pass through optional env vars if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  if [ -n "${CLAUDE_MODEL:-}" ]; then
    cmd+=(-e "CLAUDE_MODEL=${CLAUDE_MODEL}")
  fi
  # Mount docker socket, claude binary, and claude config
  cmd+=(-v /var/run/docker.sock:/var/run/docker.sock)
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  local runtime_home="${HOME:-/home/debian}"
  if [ -d "${runtime_home}/.claude" ]; then
    cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
  fi
  if [ -f "${runtime_home}/.claude.json" ]; then
    cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
  fi
  # Add environment variables for secrets (if any declared)
  if [ -n "$secrets_array" ]; then
    for secret in $secrets_array; do
@ -403,31 +468,43 @@ launch_runner() {
    log "Action ${action_id} has no secrets declared — runner will execute without extra env vars"
  fi
-  # Add formula and action id as arguments (safe from shell injection)
+  # Add volume mounts for file-based credentials (if any declared)
-  local formula="${VAULT_ACTION_FORMULA:-}"
+  local mounts_array
-  cmd+=(disinto-agents:latest bash -c
+  mounts_array="${VAULT_ACTION_MOUNTS:-}"
-    "cd /home/agent/disinto && bash formulas/${formula}.sh ${action_id}")
+  if [ -n "$mounts_array" ]; then
-
+    for mount_alias in $mounts_array; do
-  # Log command skeleton (hide all -e flags for security)
+      mount_alias=$(echo "$mount_alias" | xargs)
-  local -a log_cmd=()
+      [ -n "$mount_alias" ] || continue
-  local skip_next=0
+      case "$mount_alias" in
-  for arg in "${cmd[@]}"; do
+        ssh)
-    if [[ $skip_next -eq 1 ]]; then
+          cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
-      skip_next=0
+          ;;
-      continue
+        gpg)
-    fi
+          cmd+=(-v "${runtime_home}/.gnupg:/home/agent/.gnupg:ro")
-    if [[ "$arg" == "-e" ]]; then
+          ;;
-      log_cmd+=("$arg" "<redacted>")
+        sops)
-      skip_next=1
+          cmd+=(-v "${runtime_home}/.config/sops/age:/home/agent/.config/sops/age:ro")
-    else
+          ;;
-      log_cmd+=("$arg")
+        *)
-    fi
+          log "ERROR: Unknown mount alias '${mount_alias}' for action ${action_id}"
          write_result "$action_id" 1 "Unknown mount alias: ${mount_alias}"
          return 1
          ;;
      esac
    done
-  log "Running: ${log_cmd[*]}"
+  fi
  # Mount the ops repo so the runner entrypoint can read the action TOML
  cmd+=(-v "${OPS_REPO_ROOT}:/home/agent/ops:ro")
  # Image and entrypoint arguments: runner entrypoint + action-id
  cmd+=(disinto/agents:latest /home/agent/disinto/docker/runner/entrypoint-runner.sh "$action_id")
  log "Running: docker run --rm vault-runner-${action_id} (secrets: ${secrets_array:-none}, mounts: ${mounts_array:-none})"
  # Create temp file for logs
  local log_file
-  log_file=$(mktemp /tmp/dispatcher-logs-XXXXXX.txt)
+  log_file=$(mktemp /tmp/dispatcher-logs-XXXXXX)
  trap 'rm -f "$log_file"' RETURN
  # Execute with array expansion (safe from shell injection)
@ -451,6 +528,459 @@ launch_runner() {
  return $exit_code
 }
 # -----------------------------------------------------------------------------
 # Reproduce dispatch — launch sidecar for bug-report issues
 # -----------------------------------------------------------------------------
 # Check if a reproduce run is already in-flight for a given issue.
 # Uses a simple pid-file in /tmp so we don't double-launch per dispatcher cycle.
 _reproduce_lockfile() {
  local issue="$1"
  echo "/tmp/reproduce-inflight-${issue}.pid"
 }
 is_reproduce_running() {
  local issue="$1"
  local pidfile
  pidfile=$(_reproduce_lockfile "$issue")
  [ -f "$pidfile" ] || return 1
  local pid
  pid=$(cat "$pidfile" 2>/dev/null || echo "")
  [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
 }
 # Fetch open issues labelled bug-report that have no outcome label yet.
 # Returns a newline-separated list of "issue_number:project_toml" pairs.
 fetch_reproduce_candidates() {
  # Require FORGE_TOKEN, FORGE_URL, FORGE_REPO
  [ -n "${FORGE_TOKEN:-}" ] || return 0
  [ -n "${FORGE_URL:-}" ]   || return 0
  [ -n "${FORGE_REPO:-}" ]  || return 0
  local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
  local issues_json
  issues_json=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${api}/issues?type=issues&state=open&labels=bug-report&limit=20" 2>/dev/null) || return 0
  # Filter out issues that already carry an outcome label.
  # Write JSON to a temp file so python3 can read from stdin (heredoc) and
  # still receive the JSON as an argument (avoids SC2259: pipe vs heredoc).
  local tmpjson
  tmpjson=$(mktemp)
  echo "$issues_json" > "$tmpjson"
  python3 - "$tmpjson" <<'PYEOF'
 import sys, json
 data = json.load(open(sys.argv[1]))
 skip = {"in-progress", "in-triage", "rejected", "blocked"}
 for issue in data:
    labels = {l["name"] for l in (issue.get("labels") or [])}
    if labels & skip:
        continue
    print(issue["number"])
 PYEOF
  rm -f "$tmpjson"
 }
 # Launch one reproduce container per candidate issue.
 # project_toml is resolved from FACTORY_ROOT/projects/*.toml (first match).
 dispatch_reproduce() {
  local issue_number="$1"
  if is_reproduce_running "$issue_number"; then
    log "Reproduce already running for issue #${issue_number}, skipping"
    return 0
  fi
  # Find first project TOML available (same convention as dev-poll)
  local project_toml=""
  for toml in "$PROJECTS_DIR"/*.toml; do
    [ -f "$toml" ] && { project_toml="$toml"; break; }
  done
  if [ -z "$project_toml" ]; then
    log "WARNING: no project TOML found under ${PROJECTS_DIR}/ — skipping reproduce for #${issue_number}"
    return 0
  fi
  log "Dispatching reproduce-agent for issue #${issue_number} (project: ${project_toml})"
  # Build docker run command using array (safe from injection)
  local -a cmd=(docker run --rm
    --name "disinto-reproduce-${issue_number}"
    --network host
    --security-opt apparmor=unconfined
    -v /var/run/docker.sock:/var/run/docker.sock
    -v agent-data:/home/agent/data
    -v project-repos:/home/agent/repos
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
    -e "FORGE_REPO=${FORGE_REPO}"
    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
    -e DISINTO_CONTAINER=1
  )
  # Pass through ANTHROPIC_API_KEY if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  # Mount ~/.claude and ~/.ssh from the runtime user's home if available
  local runtime_home="${HOME:-/home/debian}"
  if [ -d "${runtime_home}/.claude" ]; then
    cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
  fi
  if [ -f "${runtime_home}/.claude.json" ]; then
    cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
  fi
  if [ -d "${runtime_home}/.ssh" ]; then
    cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
  fi
  # Mount claude CLI binary if present on host
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  # Mount the project TOML into the container at a stable path
  local container_toml="/home/agent/project.toml"
  cmd+=(-v "${project_toml}:${container_toml}:ro")
  cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
  # Launch in background; write pid-file so we don't double-launch
  "${cmd[@]}" &
  local bg_pid=$!
  echo "$bg_pid" > "$(_reproduce_lockfile "$issue_number")"
  log "Reproduce container launched (pid ${bg_pid}) for issue #${issue_number}"
 }
 # -----------------------------------------------------------------------------
 # Triage dispatch — launch sidecar for bug-report + in-triage issues
 # -----------------------------------------------------------------------------
 # Check if a triage run is already in-flight for a given issue.
 _triage_lockfile() {
  local issue="$1"
  echo "/tmp/triage-inflight-${issue}.pid"
 }
 is_triage_running() {
  local issue="$1"
  local pidfile
  pidfile=$(_triage_lockfile "$issue")
  [ -f "$pidfile" ] || return 1
  local pid
  pid=$(cat "$pidfile" 2>/dev/null || echo "")
  [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
 }
 # Fetch open issues labelled both bug-report and in-triage.
 # Returns a newline-separated list of issue numbers.
 fetch_triage_candidates() {
  # Require FORGE_TOKEN, FORGE_URL, FORGE_REPO
  [ -n "${FORGE_TOKEN:-}" ] || return 0
  [ -n "${FORGE_URL:-}" ]   || return 0
  [ -n "${FORGE_REPO:-}" ]  || return 0
  local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
  local issues_json
  issues_json=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${api}/issues?type=issues&state=open&labels=bug-report&limit=20" 2>/dev/null) || return 0
  # Filter to issues that carry BOTH bug-report AND in-triage labels.
  local tmpjson
  tmpjson=$(mktemp)
  echo "$issues_json" > "$tmpjson"
  python3 - "$tmpjson" <<'PYEOF'
 import sys, json
 data = json.load(open(sys.argv[1]))
 for issue in data:
    labels = {l["name"] for l in (issue.get("labels") or [])}
    if "bug-report" in labels and "in-triage" in labels:
        print(issue["number"])
 PYEOF
  rm -f "$tmpjson"
 }
 # Launch one triage container per candidate issue.
 # Uses the same disinto-reproduce:latest image as the reproduce-agent,
 # selecting the triage formula via DISINTO_FORMULA env var.
 # Stack lock is held for the full run (no timeout).
 dispatch_triage() {
  local issue_number="$1"
  if is_triage_running "$issue_number"; then
    log "Triage already running for issue #${issue_number}, skipping"
    return 0
  fi
  # Find first project TOML available (same convention as dev-poll)
  local project_toml=""
  for toml in "$PROJECTS_DIR"/*.toml; do
    [ -f "$toml" ] && { project_toml="$toml"; break; }
  done
  if [ -z "$project_toml" ]; then
    log "WARNING: no project TOML found under ${PROJECTS_DIR}/ — skipping triage for #${issue_number}"
    return 0
  fi
  log "Dispatching triage-agent for issue #${issue_number} (project: ${project_toml})"
  # Build docker run command using array (safe from injection)
  local -a cmd=(docker run --rm
    --name "disinto-triage-${issue_number}"
    --network host
    --security-opt apparmor=unconfined
    -v /var/run/docker.sock:/var/run/docker.sock
    -v agent-data:/home/agent/data
    -v project-repos:/home/agent/repos
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
    -e "FORGE_REPO=${FORGE_REPO}"
    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
    -e DISINTO_CONTAINER=1
    -e DISINTO_FORMULA=triage
  )
  # Pass through ANTHROPIC_API_KEY if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  # Mount ~/.claude and ~/.ssh from the runtime user's home if available
  local runtime_home="${HOME:-/home/debian}"
  if [ -d "${runtime_home}/.claude" ]; then
    cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
  fi
  if [ -f "${runtime_home}/.claude.json" ]; then
    cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
  fi
  if [ -d "${runtime_home}/.ssh" ]; then
    cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
  fi
  # Mount claude CLI binary if present on host
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  # Mount the project TOML into the container at a stable path
  local container_toml="/home/agent/project.toml"
  cmd+=(-v "${project_toml}:${container_toml}:ro")
  cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
  # Launch in background; write pid-file so we don't double-launch
  "${cmd[@]}" &
  local bg_pid=$!
  echo "$bg_pid" > "$(_triage_lockfile "$issue_number")"
  log "Triage container launched (pid ${bg_pid}) for issue #${issue_number}"
 }
 # -----------------------------------------------------------------------------
 # Verification dispatch — launch sidecar for bug-report parents with all deps closed
 # -----------------------------------------------------------------------------
 # Check if a verification run is already in-flight for a given issue.
 _verify_lockfile() {
  local issue="$1"
  echo "/tmp/verify-inflight-${issue}.pid"
 }
 is_verify_running() {
  local issue="$1"
  local pidfile
  pidfile=$(_verify_lockfile "$issue")
  [ -f "$pidfile" ] || return 1
  local pid
  pid=$(cat "$pidfile" 2>/dev/null || echo "")
  [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
 }
 # Check if an issue is a parent with sub-issues (identified by sub-issues
 # whose body contains "Decomposed from #N" where N is the parent's number).
 # Returns: 0 if parent with sub-issues found, 1 otherwise
 _is_parent_issue() {
  local parent_num="$1"
  # Fetch all issues (open and closed) to find sub-issues
  local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
  local all_issues_json
  all_issues_json=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${api}/issues?type=issues&state=all&limit=50" 2>/dev/null) || return 1
  # Find issues whose body contains "Decomposed from #<parent_num>"
  local sub_issues
  sub_issues=$(python3 -c '
 import sys, json
 parent_num = sys.argv[1]
 data = json.load(open("/dev/stdin"))
 sub_issues = []
 for issue in data:
    body = issue.get("body") or ""
    if f"Decomposed from #{parent_num}" in body:
        sub_issues.append(str(issue["number"]))
 print(" ".join(sub_issues))
 ' "$parent_num" < <(echo "$all_issues_json")) || return 1
  [ -n "$sub_issues" ]
 }
 # Check if all sub-issues of a parent are closed.
 # Returns: 0 if all closed, 1 if any still open
 _are_all_sub_issues_closed() {
  local parent_num="$1"
  # Fetch all issues (open and closed) to find sub-issues
  local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
  local all_issues_json
  all_issues_json=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${api}/issues?type=issues&state=all&limit=50" 2>/dev/null) || return 1
  # Find issues whose body contains "Decomposed from #<parent_num>"
  local sub_issues
  sub_issues=$(python3 -c '
 import sys, json
 parent_num = sys.argv[1]
 data = json.load(open("/dev/stdin"))
 sub_issues = []
 for issue in data:
    body = issue.get("body") or ""
    if f"Decomposed from #{parent_num}" in body:
        sub_issues.append(str(issue["number"]))
 print(" ".join(sub_issues))
 ' "$parent_num" < <(echo "$all_issues_json")) || return 1
  [ -z "$sub_issues" ] && return 1
  # Check if all sub-issues are closed
  for sub_num in $sub_issues; do
    local sub_state
    sub_state=$(curl -sf \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${api}/issues/${sub_num}" 2>/dev/null | jq -r '.state // "unknown"') || return 1
    if [ "$sub_state" != "closed" ]; then
      return 1
    fi
  done
  return 0
 }
 # Fetch open bug-report + in-progress issues whose sub-issues are all closed.
 # Returns a newline-separated list of issue numbers ready for verification.
 fetch_verification_candidates() {
  # Require FORGE_TOKEN, FORGE_URL, FORGE_REPO
  [ -n "${FORGE_TOKEN:-}" ] || return 0
  [ -n "${FORGE_URL:-}" ]   || return 0
  [ -n "${FORGE_REPO:-}" ]  || return 0
  local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
  # Fetch open bug-report + in-progress issues
  local issues_json
  issues_json=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${api}/issues?type=issues&state=open&labels=bug-report&limit=20" 2>/dev/null) || return 0
  # Filter to issues that also have in-progress label and have all sub-issues closed
  local tmpjson
  tmpjson=$(mktemp)
  echo "$issues_json" > "$tmpjson"
  python3 - "$tmpjson" "$api" "${FORGE_TOKEN}" <<'PYEOF'
 import sys, json
 api_base = sys.argv[2]
 token = sys.argv[3]
 data = json.load(open(sys.argv[1]))
 for issue in data:
    labels = {l["name"] for l in (issue.get("labels") or [])}
    # Must have BOTH bug-report AND in-progress labels
    if "bug-report" not in labels or "in-progress" not in labels:
        continue
    print(issue["number"])
 PYEOF
  rm -f "$tmpjson"
 }
 # Launch one verification container per candidate issue.
 # Uses the same disinto-reproduce:latest image as the reproduce-agent,
 # selecting the verify formula via DISINTO_FORMULA env var.
 dispatch_verify() {
  local issue_number="$1"
  if is_verify_running "$issue_number"; then
    log "Verification already running for issue #${issue_number}, skipping"
    return 0
  fi
  # Find first project TOML available (same convention as dev-poll)
  local project_toml=""
  for toml in "$PROJECTS_DIR"/*.toml; do
    [ -f "$toml" ] && { project_toml="$toml"; break; }
  done
  if [ -z "$project_toml" ]; then
    log "WARNING: no project TOML found under ${PROJECTS_DIR}/ — skipping verification for #${issue_number}"
    return 0
  fi
  log "Dispatching verification-agent for issue #${issue_number} (project: ${project_toml})"
  # Build docker run command using array (safe from injection)
  local -a cmd=(docker run --rm
    --name "disinto-verify-${issue_number}"
    --network host
    --security-opt apparmor=unconfined
    -v /var/run/docker.sock:/var/run/docker.sock
    -v agent-data:/home/agent/data
    -v project-repos:/home/agent/repos
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
    -e "FORGE_REPO=${FORGE_REPO}"
    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
    -e DISINTO_CONTAINER=1
    -e DISINTO_FORMULA=verify
  )
  # Pass through ANTHROPIC_API_KEY if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  # Mount ~/.claude and ~/.ssh from the runtime user's home if available
  local runtime_home="${HOME:-/home/debian}"
  if [ -d "${runtime_home}/.claude" ]; then
    cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
  fi
  if [ -f "${runtime_home}/.claude.json" ]; then
    cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
  fi
  if [ -d "${runtime_home}/.ssh" ]; then
    cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
  fi
  # Mount claude CLI binary if present on host
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  # Mount the project TOML into the container at a stable path
  local container_toml="/home/agent/project.toml"
  cmd+=(-v "${project_toml}:${container_toml}:ro")
  cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
  # Launch in background; write pid-file so we don't double-launch
  "${cmd[@]}" &
  local bg_pid=$!
  echo "$bg_pid" > "$(_verify_lockfile "$issue_number")"
  log "Verification container launched (pid ${bg_pid}) for issue #${issue_number}"
 }
 # -----------------------------------------------------------------------------
 # Main dispatcher loop
 # -----------------------------------------------------------------------------
@ -501,6 +1031,42 @@ main() {
      launch_runner "$toml_file" || true
    done
    # Reproduce dispatch: check for bug-report issues needing reproduction
    local candidate_issues
    candidate_issues=$(fetch_reproduce_candidates) || true
    if [ -n "$candidate_issues" ]; then
      while IFS= read -r issue_num; do
        [ -n "$issue_num" ] || continue
        dispatch_reproduce "$issue_num" || true
      done <<< "$candidate_issues"
    fi
    # Triage dispatch: check for bug-report + in-triage issues needing deep analysis
    local triage_issues
    triage_issues=$(fetch_triage_candidates) || true
    if [ -n "$triage_issues" ]; then
      while IFS= read -r issue_num; do
        [ -n "$issue_num" ] || continue
        dispatch_triage "$issue_num" || true
      done <<< "$triage_issues"
    fi
    # Verification dispatch: check for bug-report + in-progress issues whose sub-issues are all closed
    # These are parents whose fixes have merged and need verification
    local verify_issues
    verify_issues=$(fetch_verification_candidates) || true
    if [ -n "$verify_issues" ]; then
      while IFS= read -r issue_num; do
        [ -n "$issue_num" ] || continue
        # Double-check: this issue must have all sub-issues closed before dispatching
        if _are_all_sub_issues_closed "$issue_num"; then
          dispatch_verify "$issue_num" || true
        else
          log "Issue #${issue_num} has open sub-issues — skipping verification"
        fi
      done <<< "$verify_issues"
    fi
    # Wait before next poll
    sleep 60
  done
--- a/docker/edge/entrypoint-edge.sh
+++ b/docker/edge/entrypoint-edge.sh
@ -4,16 +4,122 @@ set -euo pipefail
 # Set USER before sourcing env.sh (Alpine doesn't set USER)
 export USER="${USER:-root}"
-DISINTO_VERSION="${DISINTO_VERSION:-main}"
+FORGE_URL="${FORGE_URL:-http://forgejo:3000}"
 DISINTO_REPO="${FORGE_URL:-http://forgejo:3000}/johba/disinto.git"
-# Shallow clone at the pinned version
+# Derive FORGE_REPO from PROJECT_TOML if available, otherwise require explicit env var
-if [ ! -d /opt/disinto/.git ]; then
+if [ -z "${FORGE_REPO:-}" ]; then
-  git clone --depth 1 --branch "$DISINTO_VERSION" "$DISINTO_REPO" /opt/disinto
+  # Try to find a project TOML to derive FORGE_REPO from
  _project_toml="${PROJECT_TOML:-}"
  if [ -z "$_project_toml" ] && [ -d "${FACTORY_ROOT:-/opt/disinto}/projects" ]; then
    for toml in "${FACTORY_ROOT:-/opt/disinto}"/projects/*.toml; do
      if [ -f "$toml" ]; then
        _project_toml="$toml"
        break
      fi
    done
  fi
  if [ -n "$_project_toml" ] && [ -f "$_project_toml" ]; then
    # Parse FORGE_REPO from project TOML using load-project.sh
    if source "${FACTORY_ROOT:-/opt/disinto}/lib/load-project.sh" "$_project_toml" 2>/dev/null; then
      if [ -n "${FORGE_REPO:-}" ]; then
        echo "Derived FORGE_REPO from PROJECT_TOML: $_project_toml" >&2
      fi
    fi
  fi
  # If still not set, fail fast with a clear error message
  if [ -z "${FORGE_REPO:-}" ]; then
    echo "FATAL: FORGE_REPO environment variable not set" >&2
    echo "Set FORGE_REPO=<owner>/<repo> in .env (e.g. FORGE_REPO=disinto-admin/disinto)" >&2
    exit 1
  fi
 fi
 # Detect bind-mount of a non-git directory before attempting clone
 if [ -d /opt/disinto ] && [ ! -d /opt/disinto/.git ] && [ -n "$(ls -A /opt/disinto 2>/dev/null)" ]; then
  echo "FATAL: /opt/disinto contains files but no .git directory." >&2
  echo "If you bind-mounted a directory at /opt/disinto, ensure it is a git working tree." >&2
  echo "Sleeping 60s before exit to throttle the restart loop..." >&2
  sleep 60
  exit 1
 fi
 # Set HOME early so credential helper and git config land in the right place.
 export HOME=/home/agent
 mkdir -p "$HOME"
 # Configure git credential helper before cloning (#604).
 # /opt/disinto does not exist yet so we cannot source lib/git-creds.sh;
 # inline a minimal credential-helper setup here.
 if [ -n "${FORGE_PASS:-}" ] && [ -n "${FORGE_URL:-}" ]; then
  _forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||')
  _forge_proto=$(printf '%s' "$FORGE_URL" | sed 's|://.*||')
  _bot_user=""
  if [ -n "${FORGE_TOKEN:-}" ]; then
    _bot_user=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
      "${FORGE_URL}/api/v1/user" 2>/dev/null | jq -r '.login // empty') || _bot_user=""
  fi
  _bot_user="${_bot_user:-dev-bot}"
  cat > "${HOME}/.git-credentials-helper" <<CREDEOF
 #!/bin/sh
 [ "\$1" = "get" ] || exit 0
 cat >/dev/null
 echo "protocol=${_forge_proto}"
 echo "host=${_forge_host}"
 echo "username=${_bot_user}"
 echo "password=${FORGE_PASS}"
 CREDEOF
  chmod 755 "${HOME}/.git-credentials-helper"
  git config --global credential.helper "${HOME}/.git-credentials-helper"
  git config --global --add safe.directory '*'
 fi
 # Shallow clone at the pinned version — use clean URL, credential helper
 # supplies auth (#604).
 if [ ! -d /opt/disinto/.git ]; then
  echo "edge: cloning ${FORGE_URL}/${FORGE_REPO} (branch ${DISINTO_VERSION:-main})..." >&2
  if ! git clone --depth 1 --branch "${DISINTO_VERSION:-main}" "${FORGE_URL}/${FORGE_REPO}.git" /opt/disinto; then
    echo >&2
    echo "FATAL: failed to clone ${FORGE_URL}/${FORGE_REPO}.git (branch ${DISINTO_VERSION:-main})" >&2
    echo "Likely causes:" >&2
    echo "  - Forgejo at ${FORGE_URL} is unreachable from the edge container" >&2
    echo "  - Repository '${FORGE_REPO}' does not exist on this forge" >&2
    echo "  - FORGE_TOKEN/FORGE_PASS is invalid or has no read access to '${FORGE_REPO}'" >&2
    echo "  - Branch '${DISINTO_VERSION:-main}' does not exist in '${FORGE_REPO}'" >&2
    echo "Workaround: bind-mount a local git checkout into /opt/disinto." >&2
    echo "Sleeping 60s before exit to throttle the restart loop..." >&2
    sleep 60
    exit 1
  fi
 fi
 # Repair any legacy baked-credential URLs in /opt/disinto (#604).
 # Now that /opt/disinto exists, source the shared lib.
 if [ -f /opt/disinto/lib/git-creds.sh ]; then
  # shellcheck source=/opt/disinto/lib/git-creds.sh
  source /opt/disinto/lib/git-creds.sh
  _GIT_CREDS_LOG_FN="echo" repair_baked_cred_urls /opt/disinto
 fi
 # Ensure log directory exists
 mkdir -p /opt/disinto-logs
 # Start dispatcher in background
 bash /opt/disinto/docker/edge/dispatcher.sh &
-# Caddy as main process
+# Start supervisor loop in background
-exec caddy run --config /etc/caddy/Caddyfile --adapter caddyfile
+PROJECT_TOML="${PROJECT_TOML:-projects/disinto.toml}"
 (while true; do
  bash /opt/disinto/supervisor/supervisor-run.sh "/opt/disinto/${PROJECT_TOML}" 2>&1 | tee -a /opt/disinto-logs/supervisor.log || true
  sleep 1200  # 20 minutes
 done) &
 # Caddy as main process — run in foreground via wait so background jobs survive
 # (exec replaces the shell, which can orphan backgrounded subshells)
 caddy run --config /etc/caddy/Caddyfile --adapter caddyfile &
 # Exit when any child dies (caddy crash → container restart via docker compose)
 wait -n
 exit 1
--- a/docker/reproduce/Dockerfile
+++ b/docker/reproduce/Dockerfile
@ -0,0 +1,11 @@
 FROM debian:bookworm-slim
 RUN apt-get update && apt-get install -y --no-install-recommends \
    bash curl git jq docker.io docker-compose-plugin \
    nodejs npm chromium \
    && npm install -g @anthropic-ai/mcp-playwright \
    && rm -rf /var/lib/apt/lists/*
 RUN useradd -m -u 1000 -s /bin/bash agent
 COPY docker/reproduce/entrypoint-reproduce.sh /entrypoint-reproduce.sh
 RUN chmod +x /entrypoint-reproduce.sh
 WORKDIR /home/agent
 ENTRYPOINT ["/entrypoint-reproduce.sh"]
--- a/docker/reproduce/entrypoint-reproduce.sh
+++ b/docker/reproduce/entrypoint-reproduce.sh
--- a/docker/runner/entrypoint-runner.sh
+++ b/docker/runner/entrypoint-runner.sh
@ -0,0 +1,115 @@
 #!/usr/bin/env bash
 # entrypoint-runner.sh — Vault runner entrypoint
 #
 # Receives an action-id, reads the vault action TOML to get the formula name,
 # then dispatches to the appropriate executor:
 #   - formulas/<name>.sh  → bash (mechanical operations like release)
 #   - formulas/<name>.toml → claude -p (reasoning tasks like triage, architect)
 #
 # Usage: entrypoint-runner.sh <action-id>
 #
 # Expects:
 #   OPS_REPO_ROOT  — path to the ops repo (mounted by compose)
 #   FACTORY_ROOT   — path to disinto code (default: /home/agent/disinto)
 #
 # Part of #516.
 set -euo pipefail
 FACTORY_ROOT="${FACTORY_ROOT:-/home/agent/disinto}"
 OPS_REPO_ROOT="${OPS_REPO_ROOT:-/home/agent/ops}"
 log() {
  printf '[%s] runner: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$*"
 }
 # Configure git credential helper so formulas can clone/push without
 # needing tokens embedded in remote URLs (#604).
 if [ -f "${FACTORY_ROOT}/lib/git-creds.sh" ]; then
  # shellcheck source=lib/git-creds.sh
  source "${FACTORY_ROOT}/lib/git-creds.sh"
  # shellcheck disable=SC2119  # no args intended — uses defaults
  configure_git_creds
 fi
 # ── Argument parsing ─────────────────────────────────────────────────────
 action_id="${1:-}"
 if [ -z "$action_id" ]; then
  log "ERROR: action-id argument required"
  echo "Usage: entrypoint-runner.sh <action-id>" >&2
  exit 1
 fi
 # ── Read vault action TOML ───────────────────────────────────────────────
 action_toml="${OPS_REPO_ROOT}/vault/actions/${action_id}.toml"
 if [ ! -f "$action_toml" ]; then
  log "ERROR: vault action TOML not found: ${action_toml}"
  exit 1
 fi
 # Extract formula name from TOML
 formula=$(grep -E '^formula\s*=' "$action_toml" \
  | sed -E 's/^formula\s*=\s*"(.*)"/\1/' | tr -d '\r')
 if [ -z "$formula" ]; then
  log "ERROR: no 'formula' field found in ${action_toml}"
  exit 1
 fi
 # Extract context for logging
 context=$(grep -E '^context\s*=' "$action_toml" \
  | sed -E 's/^context\s*=\s*"(.*)"/\1/' | tr -d '\r')
 log "Action: ${action_id}, formula: ${formula}, context: ${context:-<none>}"
 # Export action TOML path so formula scripts can use it directly
 export VAULT_ACTION_TOML="$action_toml"
 # ── Dispatch: .sh (mechanical) vs .toml (Claude reasoning) ──────────────
 formula_sh="${FACTORY_ROOT}/formulas/${formula}.sh"
 formula_toml="${FACTORY_ROOT}/formulas/${formula}.toml"
 if [ -f "$formula_sh" ]; then
  # Mechanical operation — run directly
  log "Dispatching to shell script: ${formula_sh}"
  exec bash "$formula_sh" "$action_id"
 elif [ -f "$formula_toml" ]; then
  # Reasoning task — launch Claude with the formula as prompt
  log "Dispatching to Claude with formula: ${formula_toml}"
  formula_content=$(cat "$formula_toml")
  action_context=$(cat "$action_toml")
  prompt="You are a vault runner executing a formula-based operational task.
 ## Vault action
 \`\`\`toml
 ${action_context}
 \`\`\`
 ## Formula
 \`\`\`toml
 ${formula_content}
 \`\`\`
 ## Instructions
 Execute the steps defined in the formula above. The vault action context provides
 the specific parameters for this run. Execute each step in order, verifying
 success before proceeding to the next.
 FACTORY_ROOT=${FACTORY_ROOT}
 OPS_REPO_ROOT=${OPS_REPO_ROOT}
 "
  exec claude -p "$prompt" \
    --dangerously-skip-permissions \
    ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"}
 else
  log "ERROR: no formula found for '${formula}' — checked ${formula_sh} and ${formula_toml}"
  exit 1
 fi
--- a/docs/BLAST-RADIUS.md
+++ b/docs/BLAST-RADIUS.md
@ -0,0 +1,25 @@
 # Vault blast-radius tiers
 ## Tiers
 | Tier | Meaning | Dispatch path |
 |------|---------|---------------|
 | low | Revertable, no external side effects | Direct commit to ops main; no human gate |
 | medium | Significant but reversible | PR on ops repo; blocks calling agent until merged |
 | high | Irreversible or high-blast-radius | PR on ops repo; hard blocks |
 ## Which agents are affected
 Vault-blocking applies to: predictor, planner, architect, deploy pipelines, releases, shipping.
 It does NOT apply to dev-agent — dev-agent work is always committed to a feature branch and
 revertable via git revert. Dev-agent never needs a vault gate.
 ## Default tier
 Unknown formulas default to `high`. When adding a new formula, add it to
 `vault/policy.toml` (in ops repo, seeded during disinto init from disinto repo template).
 ## Per-action override
 A vault action TOML may include `blast_radius = "low"` to override the policy tier
 for that specific invocation. Use sparingly — policy.toml is the authoritative source.
--- a/docs/EVAL-MCP-SERVER.md
+++ b/docs/EVAL-MCP-SERVER.md
@ -39,9 +39,11 @@ programmatically instead of parsing SKILL.md instructions.
   (`mcp` package). This adds a build step, runtime dependency, and
   language that no current contributor or agent maintains.
-2. **Persistent process.** The factory is cron-driven — no long-running
+2. **Persistent process.** The factory already runs a long-lived polling loop
-   daemons. An MCP server must stay up, be monitored, and be restarted on
+   (`docker/agents/entrypoint.sh`), so an MCP server is not architecturally
-   failure. This contradicts the factory's event-driven architecture (AD-004).
+   alien — the loop could keep an MCP client alive across iterations. However,
   adding a second long-running process increases the monitoring surface and
   restart complexity.
 3. **Thin wrapper over existing APIs.** Every proposed MCP tool maps directly
   to a forge API call or a skill script invocation. The MCP server would be
--- a/docs/PHASE-PROTOCOL.md
+++ b/docs/PHASE-PROTOCOL.md
@ -92,10 +92,9 @@ PHASE:failed          → label issue blocked, post diagnostic comment
 ### `idle_prompt` exit reason
-`monitor_phase_loop` (in `lib/agent-session.sh`) can exit with
+The phase monitor can exit with `_MONITOR_LOOP_EXIT=idle_prompt`. This happens
-`_MONITOR_LOOP_EXIT=idle_prompt`. This happens when Claude returns to the
+when Claude returns to the interactive prompt (`❯`) for **3 consecutive polls**
-interactive prompt (`❯`) for **3 consecutive polls** without writing any phase
+without writing any phase signal to the phase file.
 signal to the phase file.
 **Trigger conditions:**
 - The phase file is empty (no phase has ever been written), **and**
@ -111,14 +110,13 @@ signal to the phase file.
   callback without the phase file actually containing that value.
 **Agent requirements:**
- **Callback (`_on_phase_change` / `formula_phase_callback`):** Must handle
+- **Callback:** Must handle `PHASE:failed` defensively — the session is already
-  `PHASE:failed` defensively — the session is already dead, so any tmux
+  dead, so any tmux send-keys or session-dependent logic must be skipped or
-  send-keys or session-dependent logic must be skipped or guarded.
+  guarded.
 - **Post-loop exit handler (`case $_MONITOR_LOOP_EXIT`):** Must include an
  `idle_prompt)` branch. Typical actions: log the event, clean up temp files,
  and (for agents that use escalation) write an escalation entry or notify via
-  vault/forge. See `dev/dev-agent.sh` and
+  vault/forge. See `dev/dev-agent.sh` for reference implementations.
  `gardener/gardener-agent.sh` for reference implementations.
 ## Crash Recovery
--- a/docs/updating-factory.md
+++ b/docs/updating-factory.md
@ -0,0 +1,175 @@
 # Updating the Disinto Factory
 How to update the disinto factory code on a deployment box (e.g. harb-dev-box)
 after a new version lands on the upstream Forgejo.
 ## Prerequisites
 - SSH access to the deployment box
 - The upstream remote (`devbox`) pointing to the disinto-dev-box Forgejo
 ## Step 1: Pull the latest code
 ```bash
 cd ~/disinto
 git fetch devbox main
 git log --oneline devbox/main -5   # review what changed
 git stash                           # save any local fixes
 git merge devbox/main
 ```
 ## Note: docker-compose.yml is generator-only
 The `docker-compose.yml` file is now generated exclusively by `bin/disinto init`.
 The tracked file has been removed. If you have a local `docker-compose.yml` from
 before this change, it is now "yours" and won't be touched by future updates.
 To pick up generator improvements, delete the existing file and run `bin/disinto init`.
 ## Step 2: Preserve local config
 These files are not in git but are needed at runtime. Back them up before
 any compose regeneration:
 ```bash
 cp .env .env.backup
 cp projects/harb.toml projects/harb.toml.backup
 cp docker-compose.override.yml docker-compose.override.yml.backup 2>/dev/null
 ```
 ## Step 3: Regenerate docker-compose.yml
 If `generate_compose()` changed or you need a fresh compose file:
 ```bash
 rm docker-compose.yml
 source .env
 bin/disinto init https://codeberg.org/johba/harb --branch master --yes
 ```
 This will regenerate the compose but may fail partway through (token collisions,
 existing users). The compose file is written early — check it exists even if
 init errors out.
 ### Known post-regeneration fixes (until #429 lands)
 Most generator issues have been fixed. The following items no longer apply:
 - **AppArmor (#492)** — Fixed: all services now have `apparmor=unconfined`
 - **Forgejo image tag (#493)** — Fixed: generator uses `forgejo:11.0`
 - **Agent credential mounts (#495)** — Fixed: `.claude`, `.claude.json`, `.ssh`, and `project-repos` volumes are auto-generated
 - **Repo path (#494)** — Not applicable: `projects/*.toml` files are gitignored and preserved
 If you need to add custom volumes, edit the generated `docker-compose.yml` directly.
 It will not be overwritten by future `init` runs (the generator skips existing files).
 ## Step 4: Rebuild and restart
 ```bash
 # Rebuild agents image (code is baked in via COPY)
 docker compose build agents
 # Restart all disinto services
 docker compose up -d
 # If edge fails to build (caddy:alpine has no apt-get), skip it:
 docker compose up -d forgejo woodpecker woodpecker-agent agents staging
 ```
 ## Step 5: Verify
 ```bash
 # All containers running?
 docker ps --format 'table {{.Names}}\t{{.Status}}' | grep disinto
 # Forgejo responding?
 curl -sf -o /dev/null -w 'HTTP %{http_code}' http://localhost:3000/
 # Claude auth works?
 docker exec -u agent disinto-agents bash -c 'claude -p "say ok" 2>&1'
 # Agent polling loop running?
 docker exec disinto-agents pgrep -f entrypoint.sh
 # If no process: check that entrypoint.sh is the container CMD and projects TOML is mounted.
 # Agent repo cloned?
 docker exec disinto-agents ls /home/agent/repos/harb/.git && echo ok
 # If missing:
 docker exec disinto-agents chown -R agent:agent /home/agent/repos
 source .env
 docker exec -u agent disinto-agents bash -c \
  "git clone http://dev-bot:${FORGE_TOKEN}@forgejo:3000/johba/harb.git /home/agent/repos/harb"
 # Git safe.directory (needed after volume recreation)
 docker exec -u agent disinto-agents git config --global --add safe.directory /home/agent/repos/harb
 ```
 ## Step 6: Verify harb stack coexistence
 ```bash
 # Harb stack still running?
 cd ~/harb && docker compose ps --format 'table {{.Name}}\t{{.Status}}'
 # No port conflicts?
 # Forgejo: 3000, Woodpecker: 8000, harb caddy: 8081, umami: 3001
 ss -tlnp | grep -E '3000|3001|8000|8081'
 ```
 ## Step 7: Docker disk hygiene
 The reproduce image is ~1.3GB. Dangling images accumulate fast.
 ```bash
 # Check disk
 df -h /
 # Prune dangling images (safe — only removes unused)
 docker image prune -f
 # Nuclear option (removes ALL unused images, volumes, networks):
 docker system prune -af
 # WARNING: this removes cached layers, requiring full rebuilds
 ```
 ## Troubleshooting
 ### Forgejo at 170%+ CPU, not responding
 AppArmor issue. Add `security_opt: [apparmor=unconfined]` and recreate:
 ```bash
 docker compose up -d forgejo
 ```
 ### "Not logged in" / OAuth expired
 Re-auth on the host:
 ```bash
 claude auth login
 ```
 Credentials are bind-mounted into containers automatically.
 Multiple containers sharing OAuth can cause frequent expiry — consider
 using `ANTHROPIC_API_KEY` in `.env` instead.
 ### Agent loop not running after restart
 The entrypoint reads `projects/*.toml` to determine which agents to run.
 If the TOML isn't mounted or the disinto directory is read-only,
 the polling loop won't start agents. Check:
 ```bash
 docker exec disinto-agents ls /home/agent/disinto/projects/harb.toml
 docker logs disinto-agents --tail 20  # look for "Entering polling loop"
 ```
 ### "fatal: not a git repository"
 After image rebuilds, the baked-in `/home/agent/disinto` has no `.git`.
 This breaks review-pr.sh (#408). Workaround:
 ```bash
 docker exec -u agent disinto-agents git config --global --add safe.directory '*'
 ```
 ### Dev-agent stuck on closed issue
 The dev-poll latches onto in-progress issues. If the issue was closed
 externally, the agent skips it every cycle but never moves on. Check:
 ```bash
 docker exec disinto-agents tail -5 /home/agent/data/logs/dev/dev-agent.log
 ```
 Fix: clean the worktree and let it re-scan:
 ```bash
 docker exec disinto-agents rm -rf /tmp/harb-worktree-*
 ```
--- a/formulas/release.sh
+++ b/formulas/release.sh
@ -0,0 +1,187 @@
 #!/usr/bin/env bash
 # formulas/release.sh — Mechanical release script
 #
 # Implements the release workflow without Claude:
 #   1. Validate prerequisites
 #   2. Tag Forgejo main via API
 #   3. Push tag to mirrors (Codeberg, GitHub) via token auth
 #   4. Build and tag the agents Docker image
 #   5. Restart agent containers
 #
 # Usage: release.sh <action-id>
 #
 # Expects env vars:
 #   FORGE_URL, FORGE_TOKEN, FORGE_REPO, PRIMARY_BRANCH
 #   GITHUB_TOKEN    — for pushing tags to GitHub mirror
 #   CODEBERG_TOKEN  — for pushing tags to Codeberg mirror
 #
 # The action TOML context field must contain the version, e.g.:
 #   context = "Release v1.2.0"
 #
 # Part of #516.
 set -euo pipefail
 FACTORY_ROOT="${FACTORY_ROOT:-/home/agent/disinto}"
 OPS_REPO_ROOT="${OPS_REPO_ROOT:-/home/agent/ops}"
 log() {
  printf '[%s] release: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$*"
 }
 # ── Argument parsing ─────────────────────────────────────────────────────
 # VAULT_ACTION_TOML is exported by the runner entrypoint (entrypoint-runner.sh)
 action_id="${1:-}"
 if [ -z "$action_id" ]; then
  log "ERROR: action-id argument required"
  exit 1
 fi
 action_toml="${VAULT_ACTION_TOML:-${OPS_REPO_ROOT}/vault/actions/${action_id}.toml}"
 if [ ! -f "$action_toml" ]; then
  log "ERROR: vault action TOML not found: ${action_toml}"
  exit 1
 fi
 # Extract version from context field (e.g. "Release v1.2.0" → "v1.2.0")
 context=$(grep -E '^context\s*=' "$action_toml" \
  | sed -E 's/^context\s*=\s*"(.*)"/\1/' | tr -d '\r')
 RELEASE_VERSION=$(echo "$context" | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+') || true
 if [ -z "${RELEASE_VERSION:-}" ]; then
  log "ERROR: could not extract version from context: '${context}'"
  log "Context must contain a version like v1.2.0"
  exit 1
 fi
 log "Starting release ${RELEASE_VERSION} (action: ${action_id})"
 # ── Step 1: Preflight ────────────────────────────────────────────────────
 log "Step 1/6: Preflight checks"
 # Validate version format
 if ! echo "$RELEASE_VERSION" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then
  log "ERROR: invalid version format: ${RELEASE_VERSION}"
  exit 1
 fi
 # Required env vars
 for var in FORGE_URL FORGE_TOKEN FORGE_REPO PRIMARY_BRANCH; do
  if [ -z "${!var:-}" ]; then
    log "ERROR: required env var not set: ${var}"
    exit 1
  fi
 done
 # Check Docker access
 if ! docker info >/dev/null 2>&1; then
  log "ERROR: Docker not accessible"
  exit 1
 fi
 # Check tag doesn't already exist on Forgejo
 if curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
  "${FORGE_URL}/api/v1/repos/${FORGE_REPO}/tags/${RELEASE_VERSION}" >/dev/null 2>&1; then
  log "ERROR: tag ${RELEASE_VERSION} already exists on Forgejo"
  exit 1
 fi
 log "Preflight passed"
 # ── Step 2: Tag main via Forgejo API ─────────────────────────────────────
 log "Step 2/6: Creating tag ${RELEASE_VERSION} on Forgejo"
 # Get HEAD SHA of primary branch
 head_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
  "${FORGE_URL}/api/v1/repos/${FORGE_REPO}/branches/${PRIMARY_BRANCH}" \
  | jq -r '.commit.id // empty')
 if [ -z "$head_sha" ]; then
  log "ERROR: could not get HEAD SHA for ${PRIMARY_BRANCH}"
  exit 1
 fi
 # Create tag via API
 curl -sf -X POST \
  -H "Authorization: token ${FORGE_TOKEN}" \
  -H "Content-Type: application/json" \
  "${FORGE_URL}/api/v1/repos/${FORGE_REPO}/tags" \
  -d "{\"tag_name\":\"${RELEASE_VERSION}\",\"target\":\"${head_sha}\",\"message\":\"Release ${RELEASE_VERSION}\"}" \
  >/dev/null
 log "Tag ${RELEASE_VERSION} created (SHA: ${head_sha})"
 # ── Step 3: Push tag to mirrors ──────────────────────────────────────────
 log "Step 3/6: Pushing tag to mirrors"
 # Extract org/repo from FORGE_REPO (e.g. "disinto-admin/disinto" → "disinto")
 project_name="${FORGE_REPO##*/}"
 # Push to GitHub mirror (if GITHUB_TOKEN is available)
 if [ -n "${GITHUB_TOKEN:-}" ]; then
  log "Pushing tag to GitHub mirror"
  # Create tag on GitHub via API
  if curl -sf -X POST \
    -H "Authorization: token ${GITHUB_TOKEN}" \
    -H "Accept: application/vnd.github+json" \
    "https://api.github.com/repos/Disinto/${project_name}/git/refs" \
    -d "{\"ref\":\"refs/tags/${RELEASE_VERSION}\",\"sha\":\"${head_sha}\"}" \
    >/dev/null 2>&1; then
    log "GitHub: tag pushed"
  else
    log "WARNING: GitHub tag push failed (may already exist)"
  fi
 else
  log "WARNING: GITHUB_TOKEN not set — skipping GitHub mirror"
 fi
 # Push to Codeberg mirror (if CODEBERG_TOKEN is available)
 if [ -n "${CODEBERG_TOKEN:-}" ]; then
  log "Pushing tag to Codeberg mirror"
  # Codeberg uses Gitea-compatible API
  # Extract owner from FORGE_REPO for Codeberg (use same owner)
  codeberg_owner="${FORGE_REPO%%/*}"
  if curl -sf -X POST \
    -H "Authorization: token ${CODEBERG_TOKEN}" \
    -H "Content-Type: application/json" \
    "https://codeberg.org/api/v1/repos/${codeberg_owner}/${project_name}/tags" \
    -d "{\"tag_name\":\"${RELEASE_VERSION}\",\"target\":\"${head_sha}\",\"message\":\"Release ${RELEASE_VERSION}\"}" \
    >/dev/null 2>&1; then
    log "Codeberg: tag pushed"
  else
    log "WARNING: Codeberg tag push failed (may already exist)"
  fi
 else
  log "WARNING: CODEBERG_TOKEN not set — skipping Codeberg mirror"
 fi
 # ── Step 4: Build agents Docker image ────────────────────────────────────
 log "Step 4/6: Building agents Docker image"
 cd "$FACTORY_ROOT" || exit 1
 docker compose build --no-cache agents 2>&1 | tail -5
 log "Image built"
 # ── Step 5: Tag image with version ───────────────────────────────────────
 log "Step 5/6: Tagging image"
 docker tag disinto/agents:latest "disinto/agents:${RELEASE_VERSION}"
 log "Tagged disinto/agents:${RELEASE_VERSION}"
 # ── Step 6: Restart agent containers ─────────────────────────────────────
 log "Step 6/6: Restarting agent containers"
 docker compose stop agents agents-llama 2>/dev/null || true
 docker compose up -d agents agents-llama
 log "Agent containers restarted"
 # ── Done ─────────────────────────────────────────────────────────────────
 log "Release ${RELEASE_VERSION} completed successfully"
--- a/formulas/release.toml
+++ b/formulas/release.toml
@ -58,7 +58,7 @@ Validate release prerequisites before proceeding.
 7. Check if tag already exists on Forgejo:
   - curl -sf -H "Authorization: token $FORGE_TOKEN" \
-   -   "$FORGE_URL/api/v1/repos/johba/disinto/git/tags/$RELEASE_VERSION"
+   -   "$FORGE_URL/api/v1/repos/$FORGE_REPO/git/tags/$RELEASE_VERSION"
   - If exists, exit with error
 8. Export RELEASE_VERSION for subsequent steps:
@ -77,14 +77,14 @@ Create the release tag on Forgejo main via the Forgejo API.
 1. Get current HEAD SHA of main:
   - curl -sf -H "Authorization: token $FORGE_TOKEN" \
-   -   "$FORGE_URL/api/v1/repos/johba/disinto/branches/$PRIMARY_BRANCH"
+   -   "$FORGE_URL/api/v1/repos/$FORGE_REPO/branches/$PRIMARY_BRANCH"
   - Parse sha field from response
 2. Create tag via Forgejo API:
   - curl -sf -X POST \
   -   -H "Authorization: token $FORGE_TOKEN" \
   -   -H "Content-Type: application/json" \
-   -   "$FORGE_URL/api/v1/repos/johba/disinto/tags" \
+   -   "$FORGE_URL/api/v1/repos/$FORGE_REPO/tags" \
   -   -d "{\"tag\":\"$RELEASE_VERSION\",\"target\":\"$HEAD_SHA\",\"message\":\"Release $RELEASE_VERSION\"}"
   - Parse response for success
@ -106,8 +106,8 @@ description = """
 Push the newly created tag to all configured mirrors.
 1. Add mirror remotes if not already present:
-   - Codeberg: git remote add codeberg git@codeberg.org:johba/disinto.git
+   - Codeberg: git remote add codeberg git@codeberg.org:${FORGE_REPO_OWNER}/${PROJECT_NAME}.git
-   - GitHub: git remote add github git@github.com:disinto/disinto.git
+   - GitHub: git remote add github git@github.com:disinto/${PROJECT_NAME}.git
   - Check with: git remote -v
 2. Push tag to Codeberg:
@ -120,9 +120,9 @@ Push the newly created tag to all configured mirrors.
 4. Verify tags exist on mirrors:
   - curl -sf -H "Authorization: token $GITHUB_TOKEN" \
-   -   "https://api.github.com/repos/disinto/disinto/tags/$RELEASE_VERSION"
+   -   "https://api.github.com/repos/disinto/${PROJECT_NAME}/tags/$RELEASE_VERSION"
   - curl -sf -H "Authorization: token $FORGE_TOKEN" \
-   -   "$FORGE_URL/api/v1/repos/johba/disinto/git/tags/$RELEASE_VERSION"
+   -   "$FORGE_URL/api/v1/repos/$FORGE_REPO/git/tags/$RELEASE_VERSION"
 5. Log success:
   - echo "Tag $RELEASE_VERSION pushed to mirrors"
@ -227,7 +227,7 @@ Write the release result to a file for tracking.
   - {
   -   "version": "$RELEASE_VERSION",
   -   "image_id": "$IMAGE_ID",
-   -   "forgejo_tag_url": "$FORGE_URL/johba/disinto/src/$RELEASE_VERSION",
+   -   "forgejo_tag_url": "$FORGE_URL/$FORGE_REPO/src/$RELEASE_VERSION",
   -   "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
   -   "status": "success"
   - }
--- a/formulas/reproduce.toml
+++ b/formulas/reproduce.toml
@ -0,0 +1,37 @@
 # formulas/reproduce.toml — Reproduce-agent formula
 #
 # Declares the reproduce-agent's runtime parameters.
 # The dispatcher reads this to configure the sidecar container.
 #
 # stack_script: path (relative to PROJECT_REPO_ROOT) of the script used to
 # restart/rebuild the project stack before reproduction.  Omit (or leave
 # blank) to connect to an existing staging environment instead.
 #
 # tools: MCP servers to pass to claude via --mcp-server flags.
 #
 # timeout_minutes: hard upper bound on the Claude session.
 #
 # Exit gate logic (standard mode):
 #   1. Can I reproduce it? → NO → rejected/blocked → EXIT
 #                          → YES → continue
 #   2. Is the cause obvious? → YES → in-progress + backlog issue → EXIT
 #                            → NO → in-triage → EXIT
 #
 # Exit gate logic (verification mode):
 #   Triggered when all sub-issues of a parent bug-report are closed.
 #   1. Bug fixed → comment "verified fixed", remove in-progress, close issue
 #   2. Bug persists → comment "still reproduces", add in-triage, re-enter triage
 #
 # Turn budget (standard mode): 60% on step 1 (reproduction), 40% on step 2 (cause check).
 # Turn budget (verification mode): 100% on re-running reproduction steps.
 name            = "reproduce"
 description     = "Primary: reproduce the bug. Secondary: check if cause is obvious. Exit gates enforced."
 version         = 1
 # Set stack_script to the restart command for local stacks.
 # Leave empty ("") to target an existing staging environment.
 stack_script    = ""
 tools           = ["playwright"]
 timeout_minutes = 15
--- a/formulas/review-pr.toml
+++ b/formulas/review-pr.toml
@ -61,6 +61,25 @@ Do NOT flag:
 - Things that look wrong but actually work — verify by reading the code first
 - Files that were truncated from the diff (the orchestrator notes truncation)
 ## 3b. Architecture and documentation consistency
 For each BEHAVIORAL change in the diff (not pure bug fixes or formatting):
 1. Identify what behavior changed (e.g., scheduling mechanism, auth flow,
   container lifecycle, secret handling)
 2. Search AGENTS.md for claims about that behavior:
     grep -n '<keyword>' AGENTS.md
   Also check docs/ and any per-directory AGENTS.md files.
 3. Search for Architecture Decision references (AD-001 through AD-006):
     grep -n 'AD-0' AGENTS.md
   Read each AD and check if the PR's changes contradict it.
 4. If the PR changes behavior described in AGENTS.md or contradicts an AD
   but does NOT update the documentation in the same PR:
   REQUEST_CHANGES — require the documentation update in the same PR.
 This check is SKIPPED for pure bug fixes where the intended behavior is
 unchanged (the code was wrong, not the documentation).
 ## 4. Vault item quality (conditional)
 If the PR adds or modifies vault item files (`vault/pending/*.md` in the ops repo), apply these
@ -177,8 +196,16 @@ tech-debt issues via API so they are tracked separately:
    -H "Content-Type: application/json" "$FORGE_API/issues" \
    -d '{"title":"...","body":"Flagged by AI reviewer in PR #NNN.\n\n## Problem\n...\n\n---\n*Auto-created from AI review*","labels":[TECH_DEBT_ID]}'
-Only create follow-ups for clear, actionable tech debt. Do not create
+File a tech-debt issue for every finding rated **medium** or higher that
-issues for minor style nits or speculative improvements.
+is pre-existing (not introduced by this PR). Also file for **low** findings
 that represent correctness risks (dead code that masks bugs, misleading
 documentation, unguarded variables under set -u).
 Do NOT file for: style preferences, naming opinions, missing comments,
 or speculative improvements with no concrete failure mode.
 When in doubt, file. A closed-as-wontfix tech-debt issue costs nothing;
 an unfiled bug costs a future debugging session.
 ## 8. Verdict
@ -191,6 +218,11 @@ Bias toward APPROVE for small, correct changes. Use REQUEST_CHANGES only
 for actual problems (bugs, security issues, broken functionality, missing
 required behavior). Use DISCUSS sparingly.
 Note: The bias toward APPROVE applies to code correctness and style decisions.
 It does NOT apply to documentation consistency (step 3b) or tech-debt filing
 (step 7) — those are separate concerns that should be handled regardless of
 the change's correctness.
 ## 9. Output
 Write a single JSON object to the file path from REVIEW_OUTPUT_FILE.
--- a/formulas/run-architect.toml
+++ b/formulas/run-architect.toml
@ -1,19 +1,34 @@
 # formulas/run-architect.toml — Architect formula
 #
-# Executed by architect-run.sh via cron — strategic decomposition of vision
+# Executed by architect-run.sh via polling loop — strategic decomposition of vision
 # issues into development sprints.
 #
 # This formula orchestrates the architect agent's workflow:
-#   Step 1: Preflight — validate prerequisites and identify target issue
+#   Step 1: Preflight — bash handles state management:
-#   Step 2: Research + pitch — analyze codebase and write sprint pitch
+#            - Fetch open vision issues from Forgejo API
-#   Step 3: Sprint PR creation with questions (issue #101)
+#            - Fetch open architect PRs on ops repo
 #            - Fetch merged architect PRs (already pitched visions)
 #            - Filter: remove visions with open PRs, merged sprints, or sub-issues
 #            - Select up to 3 remaining vision issues for pitching
 #   Step 2: Stateless pitch generation — for each selected issue:
 #            - Invoke claude -p with: vision issue body + codebase context
 #            - Model NEVER calls Forgejo API — only generates pitch markdown
 #            - Bash creates the ops PR with pitch content
 #            - Bash posts the ACCEPT/REJECT footer comment
 #   Step 3: Sprint PR creation with questions (issue #101) (one PR per pitch)
 #   Step 4: Answer parsing + sub-issue filing (issue #102)
 #
 # Architecture:
 # - Bash script (architect-run.sh) handles ALL state management
 # - Model calls are stateless — no Forgejo API access, no memory between calls
 # - Dedup is automatic via bash filters (no journal-based memory needed)
 # - Max 3 open architect PRs at any time
 #
 # AGENTS.md maintenance is handled by the gardener (#246).
 name        = "run-architect"
 description = "Architect: strategic decomposition of vision into sprints"
-version     = 1
+version     = 2
 model       = "opus"
 [context]
@ -23,126 +38,90 @@ files = ["VISION.md", "AGENTS.md"]
 [[steps]]
 id    = "preflight"
-title = "Preflight: validate prerequisites and identify target vision issue"
+title = "Preflight: bash-driven state management and issue selection"
 description = """
-This step performs preflight checks and identifies the most unblocking vision issue.
+This step performs preflight checks and selects up to 3 vision issues for pitching.
 IMPORTANT: All state management is handled by bash (architect-run.sh), NOT the model.
-Actions:
+Architecture Decision: Bash-driven orchestration with stateless model calls
-1. Pull latest code from both disinto repo and ops repo
+- The model NEVER calls Forgejo API during pitching
-2. Read prerequisite tree from $OPS_REPO_ROOT/prerequisites.md
+- Bash fetches all data from Forgejo API (vision issues, open PRs, merged PRs)
-3. Fetch open issues labeled 'vision' from Forgejo API
+- Bash filters and deduplicates (no model-level dedup or journal-based memory)
-4. Check for open architect PRs on ops repo (handled by #101/#102)
+- For each selected issue, bash invokes stateless claude -p (model only generates pitch)
-5. If open architect PRs exist, handle accept/reject responses (see Capability B below)
+- Bash creates PRs and posts footer comments (no model API access)
-6. If no vision issues, signal PHASE:done
+
 Bash Actions (in architect-run.sh):
 1. Fetch open vision issues from Forgejo API: GET /repos/{owner}/{repo}/issues?labels=vision&state=open
 2. Fetch open architect PRs from ops repo: GET /repos/{owner}/{repo}/pulls?state=open
 3. Fetch merged sprint PRs: GET /repos/{owner}/{repo}/pulls?state=closed (filter merged=true)
 4. Filter out visions that:
   - Already have open architect PRs (check PR body for issue number reference)
   - Have in-progress label
   - Have open sub-issues (check for 'Decomposed from #N' pattern)
   - Have merged sprint PRs (decomposition already done)
 5. Select up to (3 - open_architect_pr_count) remaining vision issues
 6. If no issues remain AND no responses to process, signal PHASE:done
 If open architect PRs exist, handle accept/reject responses FIRST (see Capability B below).
 After handling existing PRs, count remaining open architect PRs and calculate pitch_budget.
 ## Multi-pitch selection (up to 3 per run)
 After handling existing PRs, determine how many new pitches can be created:
  pitch_budget = 3 - <number of open architect PRs remaining after handling>
 For each available pitch slot:
 1. From the vision issues list, skip any issue that already has an open architect PR
 2. Skip any issue that already has the `in-progress` label
 3. Check for existing sub-issues filed from this vision issue
 4. Check for merged sprint PRs referencing this vision issue
 5. From remaining candidates, pick the most unblocking issue first
 6. Add to ARCHITECT_TARGET_ISSUES array
 Skip conditions:
 - If no vision issues are found, signal PHASE:done
 - If pitch_budget <= 0 (already 3 open architect PRs), skip pitching
 - If all vision issues already have open architect PRs, signal PHASE:done
 - If all vision issues have open sub-issues, skip pitching
 - If all vision issues have merged sprint PRs, skip pitching
 Output:
- Sets ARCHITECT_TARGET_ISSUE to the issue number of the selected vision issue
+- Sets ARCHITECT_TARGET_ISSUES as a JSON array of issue numbers to pitch (up to 3)
 - Exports VISION_ISSUES as a JSON array of issue objects
 ## Capability B: Handle accept/reject on existing pitch PRs
 When open architect PRs exist on the ops repo:
 1. Fetch comments on each open architect PR via Forgejo API
 2. Look for human response:
   **ACCEPT** (case insensitive): Human wants to proceed
   - Architect does deep research for design forks (same as #100 research but now identifying decision points)
   - Formulates multiple-choice questions (Q1, Q2, Q3...)
   - Updates the sprint spec file on the PR branch:
     - Adds `## Design forks` section with fork options
     - Adds `## Proposed sub-issues` section with concrete issues per fork path
   - Comments on the PR with the questions formatted as multiple choice
   - Signal PHASE:done (answer processing is #102)
   **REJECT: <reason>** (case insensitive, reason after colon):
   - Journal the rejection reason via profile_write_journal (if .profile exists)
     — the architect learns what pitches fail
   - Close the PR via Forgejo API (do not merge — rejected pitches do not persist in sprints/)
   - Remove the branch via Forgejo API
   - Signal PHASE:done
   **No response yet**: skip silently, signal PHASE:done
 All git operations use the Forgejo API (create branch, write/update file, create PR,
 close PR, delete branch). No SSH.
 """
 [[steps]]
 id    = "research_pitch"
-title = "Research + pitch: analyze codebase and write sprint pitch"
+title = "Stateless pitch generation: model generates content, bash creates PRs"
 description = """
-This step performs deep codebase research and writes a sprint pitch for the
+IMPORTANT: This step is executed by bash (architect-run.sh) via stateless claude -p calls.
-selected vision issue.
+The model NEVER calls Forgejo API — it only reads context and generates pitch markdown.
-Actions:
+Architecture:
 - Bash orchestrates the loop over ARCHITECT_TARGET_ISSUES
 - For each issue: bash fetches issue body from Forgejo API, then invokes stateless claude -p
 - Model receives: vision issue body + codebase context (VISION.md, AGENTS.md, prerequisites.md)
 - Model outputs: sprint pitch markdown ONLY (no API calls, no side effects)
 - Bash creates the PR and posts the ACCEPT/REJECT footer comment
-1. Read the codebase deeply:
+For each issue in ARCHITECT_TARGET_ISSUES, bash performs:
   - Read all files mentioned in the issue body
   - Search for existing interfaces that could be reused
   - Check what infrastructure already exists
-2. Assess complexity and cost:
+1. Fetch vision issue details from Forgejo API:
-   - How many files/subsystems are touched?
+   - GET /repos/{owner}/{repo}/issues/{issue_number}
-   - What new infrastructure would need to be maintained after this sprint?
+   - Extract: title, body
   - What are the risks (breaking changes, security implications, integration complexity)?
   - Is this mostly gluecode or greenfield?
-3. Write sprint pitch to scratch file for PR creation step (#101):
+2. Invoke stateless claude -p with prompt:
   "Write a sprint pitch for this vision issue. Output only the pitch markdown."
   Context provided:
   - Vision issue #N: <title>
   - Vision issue body
   - Project context (VISION.md, AGENTS.md)
   - Codebase context (prerequisites.md, graph section)
   - Formula content
-# Sprint pitch: <name>
+3. Model generates pitch markdown (NO API CALLS):
-## Vision issues
+# Sprint: <sprint-name>
 - #N — <title>
 ## What this enables
 <what the project can do after this sprint that it can't do now>
 ## What exists today
 <current state — infrastructure, interfaces, code that can be reused>
 ## Complexity
 <number of files, subsystems, estimated sub-issues>
 <gluecode vs greenfield ratio>
 ## Risks
 <what could go wrong, what breaks if this is done badly>
 ## Cost — new infra to maintain
 <what ongoing maintenance burden does this sprint add>
 <new services, cron jobs, formulas, agent roles>
 ## Recommendation
 <architect's assessment: worth it / defer / alternative approach>
 IMPORTANT: Do NOT include design forks or questions yet. The pitch is a go/no-go
 decision for the human. Questions come only after acceptance.
 Output:
 - Writes sprint pitch to $SCRATCH_FILE (/tmp/architect-{project}-scratch.md)
 - The pitch serves as input for sprint PR creation step (#101)
 """
 [[steps]]
 id    = "sprint_pr_creation"
 title = "Sprint PR creation with questions (issue #101)"
 description = """
 This step creates a PR on the ops repo with the sprint proposal when no PR exists yet.
 ## Capability A: Create pitch PR (from research output)
 If step 2 (research/pitch) produced a pitch and no PR exists yet:
 1. Create branch `architect/<sprint-slug>` on ops repo via Forgejo API
   - Sprint slug: lowercase, hyphenated version of sprint name
   - Use Forgejo API: POST /repos/{owner}/{repo}/git/branches
 2. Write sprint spec file to sprints/<sprint-slug>.md on the new branch:
 # Sprint: <name>
 ## Vision issues
 - #N — <title>
@ -162,19 +141,104 @@ If step 2 (research/pitch) produced a pitch and no PR exists yet:
 ## Cost — new infra to maintain
 <what ongoing maintenance burden does this sprint add>
-<new services, cron jobs, formulas, agent roles>
+<new services, scheduled tasks, formulas, agent roles>
 ## Recommendation
 <architect's assessment: worth it / defer / alternative approach>
-3. Create PR on ops repo via Forgejo API:
+IMPORTANT: Do NOT include design forks or questions yet. The pitch is a go/no-go
-   - Title: `architect: <sprint summary>`
+decision for the human. Questions come only after acceptance.
   - Body: pitch content (what it enables, complexity, risks, cost)
   - Base branch: primary branch (main/master)
   - Head branch: architect/<sprint-slug>
   - Footer: "Reply `ACCEPT` to proceed with design questions, or `REJECT: <reason>` to decline."
-4. Signal PHASE:done
+4. Bash creates PR:
   - Create branch: architect/sprint-{pitch-number}
   - Write sprint spec to sprints/{sprint-slug}.md
   - Create PR with pitch content as body
   - Post footer comment: "Reply ACCEPT to proceed with design questions, or REJECT: <reason> to decline."
   - Add in-progress label to vision issue
 Output:
 - One PR per vision issue (up to 3 per run)
 - Each PR contains the pitch markdown
 - If ARCHITECT_TARGET_ISSUES is empty, skip this step
 """
 [[steps]]
 id    = "sprint_pr_creation"
 title = "Sprint PR creation with questions (issue #101) — handled by bash"
 description = """
 IMPORTANT: PR creation is handled by bash (architect-run.sh) during the pitch step.
 This step is for documentation only — the actual PR creation happens in research_pitch.
 ## Approved PR → Initial design questions (issue #570)
 When a sprint pitch PR receives an APPROVED review but has no `## Design forks`
 section and no Q1:, Q2: comments yet, the architect enters a new state:
 1. detect_approved_pending_questions() identifies this state
 2. A fresh agent session starts with a special prompt
 3. The agent reads the approved pitch, posts initial design questions (Q1:, Q2:, etc.)
 4. The agent adds a `## Design forks` section to the PR body
 5. The PR transitions into the questions phase, where the existing Q&A loop takes over
 This ensures approved PRs don't sit indefinitely without design conversation.
 Architecture:
 - Bash creates PRs during stateless pitch generation (step 2)
 - Model has no role in PR creation — no Forgejo API access
 - This step describes the PR format for reference
 PR Format (created by bash):
 1. Branch: architect/sprint-{pitch-number}
 2. Sprint spec file: sprints/{sprint-slug}.md
   Contains the pitch markdown from the model.
 3. PR via Forgejo API:
   - Title: architect: <sprint summary>
   - Body: plain markdown text from model output
   - Base: main (or PRIMARY_BRANCH)
   - Head: architect/sprint-{pitch-number}
   - Footer comment: "Reply ACCEPT to proceed with design questions, or REJECT: <reason> to decline."
 4. Add in-progress label to vision issue:
   - Look up label ID: GET /repos/{owner}/{repo}/labels
   - Add label: POST /repos/{owner}/{repo}/issues/{issue_number}/labels
 After creating all PRs, signal PHASE:done.
 ## Forgejo API Reference
 All operations use the Forgejo API with Authorization: token ${FORGE_TOKEN} header.
 ### Create branch
 ```
 POST /repos/{owner}/{repo}/branches
 Body: {"new_branch_name": "architect/<sprint-slug>", "old_branch_name": "main"}
 ```
 ### Create/update file
 ```
 PUT /repos/{owner}/{repo}/contents/<path>
 Body: {"message": "sprint: add <sprint-slug>.md", "content": "<base64-encoded-content>", "branch": "architect/<sprint-slug>"}
 ```
 ### Create PR
 ```
 POST /repos/{owner}/{repo}/pulls
 Body: {"title": "architect: <sprint summary>", "body": "<markdown-text>", "head": "architect/<sprint-slug>", "base": "main"}
 ```
 **Important: PR body format**
 - The body field must contain plain markdown text (the raw content from the model)
 - Do NOT JSON-encode or escape the body — pass it as a JSON string value
 - Newlines and markdown formatting (headings, lists, etc.) must be preserved as-is
 ### Add label to issue
 ```
 POST /repos/{owner}/{repo}/issues/{index}/labels
 Body: {"labels": [<label-id>]}
 ```
 ## Forgejo API Reference
@ -195,9 +259,14 @@ Body: {"message": "sprint: add <sprint-slug>.md", "content": "<base64-encoded-co
 ### Create PR
 ```
 POST /repos/{owner}/{repo}/pulls
-Body: {"title": "architect: <sprint summary>", "body": "<pitch-content>", "head": "architect/<sprint-slug>", "base": "main"}
+Body: {"title": "architect: <sprint summary>", "body": "<markdown-text>", "head": "architect/<sprint-slug>", "base": "main"}
 ```
 **Important: PR body format**
 - The `body` field must contain **plain markdown text** (the raw content from the scratch file)
 - Do NOT JSON-encode or escape the body — pass it as a JSON string value
 - Newlines and markdown formatting (headings, lists, etc.) must be preserved as-is
 ### Close PR
 ```
 PATCH /repos/{owner}/{repo}/pulls/{index}
@ -208,97 +277,20 @@ Body: {"state": "closed"}
 ```
 DELETE /repos/{owner}/{repo}/git/branches/<branch-name>
 ```
 """
-[[steps]]
+### Get labels (look up label IDs by name)
 id    = "answer_parsing"
 title = "Answer parsing + sub-issue filing (issue #102)"
 description = """
 This step processes human answers to design questions and files sub-issues.
 ## Preflight: Detect PRs in question phase
 An architect PR is in the question phase if ALL of the following are true:
 - PR is open
 - PR body or sprint spec file contains a `## Design forks` section (added by #101 after ACCEPT)
 - PR has question comments (Q1, Q2, Q3... format)
 ## Answer parsing
 Human comments on the PR use this format:
 ```
 Q1: A
 Q2: B
 Q3: A
 ```
 Parser matches lines starting with `Q` + digit(s) + `:` + space + letter A-D (case insensitive).
 Ignore other content in the comment.
 ## Processing paths
 ### All questions answered (every `### Q` heading has a matching `Q<N>: <letter>` comment)
 1. Parse each answer (e.g. `Q1: A`, `Q2: C`)
 2. Read the sprint spec from the PR branch
 3. Generate final sub-issues based on answers:
   - Each sub-issue uses the appropriate issue template (bug/feature/refactor from `.codeberg/ISSUE_TEMPLATE/`)
   - Fill all template fields:
     - Problem/motivation (feature) or What's broken (bug/refactor)
     - Proposed solution (feature) or Approach (refactor) or Steps to reproduce (bug)
     - Affected files (max 3)
     - Acceptance criteria (max 5)
     - Dependencies
   - File via Forgejo API on the **disinto repo** (not ops repo)
   - Label as `backlog`
 4. Comment on PR: "Sprint filed: #N, #N, #N"
 5. Merge the PR (sprint spec with answers persists in `ops/sprints/`)
 ### Some questions answered, not all
 1. Acknowledge answers received
 2. Comment listing remaining unanswered questions
 3. Signal PHASE:done (check again next poll)
 ### No answers yet (questions posted but human hasn't responded)
 1. Skip — signal PHASE:done
 ## Forgejo API for filing issues on disinto repo
 All operations use the Forgejo API with `Authorization: token ${FORGE_TOKEN}` header.
 ### Create issue
 ```
 POST /repos/{owner}/{repo}/issues
 Body: {
  "title": "<issue title>",
  "body": "<issue body with template fields>",
  "labels": [123],  // backlog label ID
  "assignees": ["architect-bot"]
 }
 ```
 ### Close PR
 ```
 PATCH /repos/{owner}/{repo}/pulls/{index}
 Body: {"state": "closed"}
 ```
 ### Merge PR
 ```
 POST /repos/{owner}/{repo}/pulls/{index}/merge
 Body: {"Do": "merge"}
 ```
 ### Post comment on PR (via issues endpoint)
 ```
 POST /repos/{owner}/{repo}/issues/{index}/comments
 Body: {"body": "<comment text>"}
 ```
 ### Get label ID
 ```
 GET /repos/{owner}/{repo}/labels
 ```
 ### Add label to issue (for in-progress on vision issue)
 ```
 POST /repos/{owner}/{repo}/issues/{index}/labels
 Body: {"labels": [<label-id>]}
 ```
 ### Remove label from issue (for in-progress removal on REJECT)
 ```
 DELETE /repos/{owner}/{repo}/issues/{index}/labels/{label-id}
 ```
 """
--- a/formulas/run-gardener.toml
+++ b/formulas/run-gardener.toml
@ -76,6 +76,63 @@ Pre-checks (bash, zero tokens — detect problems before invoking Claude):
 6. Tech-debt promotion: list all tech-debt labeled issues — goal is to
   process them all (promote to backlog or classify as dust).
 7. Bug-report detection: for each open unlabeled issue (no backlog, no
   bug-report, no in-progress, no blocked, no underspecified, no vision,
   no tech-debt), check whether it describes a user-facing bug with
   reproduction steps. Criteria — ALL must be true:
   a. Body describes broken behavior (something that should work but
      doesn't), NOT a feature request or enhancement
   b. Body contains steps to reproduce (numbered list, "steps to
      reproduce" heading, or clear sequence of actions that trigger the bug)
   c. Issue is not already labeled
   If all criteria match, enrich the issue body and write the manifest actions:
   Body enrichment (CRITICAL — turns raw reports into actionable investigation briefs):
   Before writing the add_label action, construct an enriched body by appending
   these sections to the original issue body:
   a. ``## What was reported``
      One or two sentence summary of the user's claim. Distill the broken
      behavior concisely — what the user expected vs. what actually happened.
   b. ``## Known context``
      What can be inferred from the codebase without running anything:
      - Which contracts/components/files are involved (use AGENTS.md layout
        and file paths mentioned in the issue or body)
      - What the expected behavior should be (from VISION.md, docs, code)
      - Any recent changes to involved components:
          git log --oneline -5 -- <paths>
      - Related issues or prior fixes (cross-reference by number if known)
   c. ``## Reproduction plan``
      Concrete steps for a reproduce-agent or human. Be specific:
      - Which environment to use (e.g. "start fresh stack with
        \`./scripts/dev.sh restart --full\`")
      - Which transactions or actions to execute (with \`cast\` commands,
        API calls, or UI navigation steps where applicable)
      - What state to check after each step (contract reads, API queries,
        UI observations, log output)
   d. ``## What needs verification``
      Checkboxes distinguishing known facts from unknowns:
      - ``- [ ]`` Does the reported behavior actually occur? (reproduce)
      - ``- [ ]`` Is <component X> behaving as expected? (check state)
      - ``- [ ]`` Is the data flow correct from <A> to <B>? (trace)
      Tailor these to the specific bug — three to five items covering the
      key unknowns a reproduce-agent must resolve.
   e. Construct full new body = original body text + appended sections.
      Write an edit_body action BEFORE the add_label action:
        echo '{"action":"edit_body","issue":NNN,"body":"<full new body>"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
   f. Write the add_label action:
        echo '{"action":"add_label","issue":NNN,"label":"bug-report"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
        echo "ACTION: labeled #NNN as bug-report — <reason>" >> "$RESULT_FILE"
   Do NOT also add the backlog label — bug-report is a separate triage
   track that feeds into reproduction automation.
 For each issue, choose ONE action and write to result file:
 ACTION (substantial — promote, close duplicate, add acceptance criteria):
@ -164,11 +221,65 @@ Sibling dependency rule (CRITICAL):
   Well-structured issues (both sections present) are left untouched —
   they are ready for dev-agent pickup.
 8. Bug-report lifecycle — auto-close resolved parent issues:
   For each open issue, check whether it is a parent that was decomposed
   into sub-issues. A parent is identified by having OTHER issues whose
   body contains "Decomposed from #N" where N is the parent's number.
   Algorithm:
   a. From the open issues fetched in step 1, collect all issue numbers.
   b. For each open issue number N, search ALL issues (open AND closed)
      for bodies containing "Decomposed from #N":
        curl -sf -H "Authorization: token $FORGE_TOKEN" \
          "$FORGE_API/issues?state=all&type=issues&limit=50" \
        | jq -r --argjson n N \
          '[.[] | select(.body != null) | select(.body | test("Decomposed from #" + ($n | tostring) + "\\b"))] | length'
      If zero sub-issues found, skip — this is not a decomposed parent.
   c. If sub-issues exist, check whether ALL of them are closed:
        curl -sf -H "Authorization: token $FORGE_TOKEN" \
          "$FORGE_API/issues?state=all&type=issues&limit=50" \
        | jq -r --argjson n N \
          '[.[] | select(.body != null) | select(.body | test("Decomposed from #" + ($n | tostring) + "\\b"))]
           | {total: length, closed: [.[] | select(.state == "closed")] | length}
           | .total == .closed'
      If the result is "false", some sub-issues are still open — skip.
   d. If ALL sub-issues are closed, collect sub-issue numbers and titles:
        SUB_ISSUES=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
          "$FORGE_API/issues?state=all&type=issues&limit=50" \
        | jq -r --argjson n N \
          '[.[] | select(.body != null) | select(.body | test("Decomposed from #" + ($n | tostring) + "\\b"))]
           | .[] | "- #\(.number) \(.title)"')
   e. Write a comment action listing the resolved sub-issues.
      Use jq to build valid JSON (sub-issue titles may contain quotes/backslashes,
      and SUB_ISSUES is multiline — raw interpolation would break JSONL):
        COMMENT_BODY=$(printf 'All sub-issues have been resolved:\n%s\n\nClosing this parent issue as all decomposed work is complete.' "$SUB_ISSUES")
        jq -n --argjson issue N --arg body "$COMMENT_BODY" \
          '{action:"comment", issue: $issue, body: $body}' \
          >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
   f. Write a close action:
        jq -n --argjson issue N \
          '{action:"close", issue: $issue, reason: "all sub-issues resolved"}' \
          >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
   g. Log the action:
        echo "ACTION: closed #N — all sub-issues resolved" >> "$RESULT_FILE"
   Edge cases:
   - Already closed parent: skipped (only open issues are processed)
   - No sub-issues found: skipped (not a decomposed issue)
   - Multi-cause bugs: stays open until ALL sub-issues are closed
 Processing order:
  1. Handle PRIORITY_blockers_starving_factory first — promote or resolve
  2. Quality gate — strip backlog from issues missing acceptance criteria or affected files
-  3. Process tech-debt issues by score (impact/effort)
+  3. Bug-report detection — label qualifying issues before other classification
-  4. Classify remaining items as dust or route to vault
+  4. Bug-report lifecycle — close parents whose sub-issues are all resolved
  5. Process tech-debt issues by score (impact/effort)
  6. Classify remaining items as dust or route to vault
 Do NOT bundle dust yourself — the dust-bundling step handles accumulation,
 dedup, TTL expiry, and bundling into backlog issues.
@ -233,13 +344,12 @@ needs = ["grooming"]
 [[steps]]
 id    = "agents-update"
-title = "Check AGENTS.md watermarks, update stale files, enforce size limit"
+title = "Check AGENTS.md watermarks, discover structural changes, update stale files"
 description = """
-Check all AGENTS.md files for staleness, update any that are outdated, and
+Maintain all AGENTS.md files by detecting structural drift since the last
-enforce the ~200-line size limit via progressive disclosure splitting.
+review. Uses git history as the source of truth — not vibes.
 This keeps documentation fresh — runs 2x/day so drift stays small.
-## Part A: Watermark staleness check and update
+## Part A: Discover what changed
 1. Read the HEAD SHA from preflight:
     HEAD_SHA=$(cat /tmp/gardener-head-sha)
@ -249,102 +359,72 @@ This keeps documentation fresh — runs 2x/day so drift stays small.
 3. For each file, read the watermark from line 1:
     <!-- last-reviewed: <sha> -->
   If no watermark exists, treat the file as fully stale (review everything).
 4. Check for changes since the watermark:
     git log --oneline <watermark>..HEAD -- <directory>
   If zero changes, the file is current — skip it.
-5. For stale files:
+5. For each stale file, run a STRUCTURAL DIFF — this is the core of the step:
   - Read the AGENTS.md and the source files in that directory
   - Update the documentation to reflect code changes since the watermark
   - Set the watermark to the HEAD SHA from the preflight step
   - Conventions: architecture and WHY not implementation details
-## Part B: Size limit enforcement (progressive disclosure split)
+   a. FILE INVENTORY: list files at watermark vs HEAD for this directory:
        git ls-tree -r --name-only <watermark> -- <directory>
        git ls-tree -r --name-only HEAD -- <directory>
      Diff the two lists. Categorize:
        - NEW files: in HEAD but not in watermark
        - DELETED files: in watermark but not in HEAD
        - Check AGENTS.md layout section: does it list each current file?
          Files present in the directory but absent from the layout = GAPS.
          Files listed in the layout but missing from the directory = LIES.
-After all updates are done, count lines in the root AGENTS.md:
+   b. REFERENCE VALIDATION: extract every file path, function name, and
      shell variable referenced in the AGENTS.md. For each:
        - File paths: verify the file exists (ls or git ls-tree HEAD)
        - Function names: grep for the definition in the codebase
        - Script names: verify they exist where claimed
      Any reference that fails validation is a LIE — flag it for correction.
   c. SEMANTIC CHANGES: for files that existed at both watermark and HEAD,
      check if they changed meaningfully:
        git diff <watermark>..HEAD -- <directory>/*.sh <directory>/*.py <directory>/*.toml
      Look for: new exported functions, removed functions, renamed files,
      changed CLI flags, new environment variables, new configuration.
      Ignore: internal refactors, comment changes, formatting.
 6. For each stale file, apply corrections:
   - Add NEW files to the layout section
   - Remove DELETED files from the layout section
   - Fix every LIE found in reference validation
   - Add notes about significant SEMANTIC CHANGES
   - Set the watermark to HEAD_SHA
   - Conventions: document architecture and WHY, not implementation details
 ## Part B: Size limit enforcement
 After all updates, count lines in the root AGENTS.md:
     wc -l < "$PROJECT_REPO_ROOT/AGENTS.md"
-If the root AGENTS.md exceeds 200 lines, perform a progressive disclosure
+If it exceeds 200 lines, split verbose sections into per-directory files
-split. The principle: agent reads the map, drills into detail only when
+using progressive disclosure:
 needed. You wouldn't dump a 500-page wiki on a new hire's first morning.
-6. Identify per-directory sections to extract. Each agent section under
+7. Identify sections that can be extracted to per-directory files.
-   "## Agents" (e.g. "### Dev (`dev/`)", "### Review (`review/`)") and
+   Keep the root AGENTS.md as a table of contents — brief overview,
-   each helper section (e.g. "### Shared helpers (`lib/`)") is a candidate.
+   directory layout, summary tables with links to detail files.
   Also extract verbose subsections like "## Issue lifecycle and label
   conventions" and "## Phase-Signaling Protocol" into docs/ or the
   relevant directory.
-7. For each section to extract, create a `{dir}/AGENTS.md` file with:
+8. For each extracted section, create a `{dir}/AGENTS.md` with:
   - Line 1: watermark <!-- last-reviewed: <HEAD_SHA> -->
-   - The full section content (role, trigger, key files, env vars, lifecycle)
+   - The full section content, preserving structure and detail
   - Keep the same markdown structure and detail level
-   Example for dev/:
+9. Replace extracted sections in root with concise summaries + links.
   ```
   <!-- last-reviewed: abc123 -->
   # Dev Agent
-   **Role**: Implement issues autonomously ...
+10. Verify root is under 200 lines. If still over, extract more.
   **Trigger**: dev-poll.sh runs every 10 min ...
   **Key files**: ...
   **Environment variables consumed**: ...
   **Lifecycle**: ...
   ```
 8. Replace extracted sections in the root AGENTS.md with a concise
   directory map table. The root file keeps ONLY:
   - Watermark (line 1)
   - ## What this repo is (brief overview)
   - ## Directory layout (existing tree)
   - ## Tech stack
   - ## Coding conventions
   - ## How to lint and test
   - ## Agents — replaced with a summary table pointing to per-dir files:
     ## Agents
     | Agent | Directory | Role | Guide |
     |-------|-----------|------|-------|
     | Dev | dev/ | Issue implementation | [dev/AGENTS.md](dev/AGENTS.md) |
     | Review | review/ | PR review | [review/AGENTS.md](review/AGENTS.md) |
     | Gardener | gardener/ | Backlog grooming | [gardener/AGENTS.md](gardener/AGENTS.md) |
     | ... | ... | ... | ... |
   - ## Shared helpers — replaced with a brief pointer:
     "See [lib/AGENTS.md](lib/AGENTS.md) for the full helper reference."
     Keep the summary table if it fits, or move it to lib/AGENTS.md.
   - ## Issue lifecycle and label conventions — keep a brief summary
     (labels table + dependency convention) or move verbose parts to
     docs/PHASE-PROTOCOL.md
   - ## Architecture Decisions — keep in root (humans write, agents enforce)
   - ## Phase-Signaling Protocol — keep a brief summary with pointer:
     "See [docs/PHASE-PROTOCOL.md](docs/PHASE-PROTOCOL.md) for the full spec."
 9. Verify the root AGENTS.md is now under 200 lines:
     LINE_COUNT=$(wc -l < "$PROJECT_REPO_ROOT/AGENTS.md")
     if [ "$LINE_COUNT" -gt 200 ]; then
       echo "WARNING: root AGENTS.md still $LINE_COUNT lines after split"
     fi
   If still over 200, trim further — move more detail into per-directory
   files. The root should read like a table of contents, not an encyclopedia.
 10. Each new per-directory AGENTS.md must have a watermark on line 1.
    The gardener maintains freshness for ALL AGENTS.md files — root and
    per-directory — using the same watermark mechanism from Part A.
 ## Staging
-11. Stage ALL AGENTS.md files you created or changed — do NOT commit yet.
+11. Stage all AGENTS.md files created or changed:
    All git writes happen in the commit-and-pr step at the end:
      find . -name "AGENTS.md" -not -path "./.git/*" -exec git add {} +
-12. If no AGENTS.md files need updating AND root is under 200 lines,
+12. If no files need updating AND root is under 200 lines, skip entirely.
    skip this step entirely.
 CRITICAL: If this step fails for any reason, log the failure and move on.
 Do NOT let an AGENTS.md failure prevent the commit-and-pr step.
--- a/formulas/run-planner.toml
+++ b/formulas/run-planner.toml
@ -1,6 +1,6 @@
 # formulas/run-planner.toml — Strategic planning formula (v4: graph-driven)
 #
-# Executed directly by planner-run.sh via cron — no action issues.
+# Executed directly by planner-run.sh via polling loop — no action issues.
 # planner-run.sh creates a tmux session with Claude (opus) and injects
 # this formula as context, plus the graph report from build-graph.py.
 #
--- a/formulas/run-predictor.toml
+++ b/formulas/run-predictor.toml
@ -6,7 +6,7 @@
 # Memory: previous predictions on the forge ARE the memory.
 # No separate memory file — the issue tracker is the source of truth.
 #
-# Executed by predictor/predictor-run.sh via cron — no action issues.
+# Executed by predictor/predictor-run.sh via polling loop — no action issues.
 # predictor-run.sh creates a tmux session with Claude (sonnet) and injects
 # this formula as context. Claude executes all steps autonomously.
 #
@ -119,27 +119,24 @@ For each weakness you identify, choose one:
    **Suggested action:** <what the planner should consider>
 **EXPLOIT** — high confidence, have a theory you can test:
-  File a prediction/unreviewed issue AND an action issue that dispatches
+  File a prediction/unreviewed issue AND a vault PR that dispatches
-  a formula to generate evidence.
+  a formula to generate evidence (AD-006: external actions go through vault).
-  The prediction explains the theory. The action generates the proof.
+  The prediction explains the theory. The vault PR triggers the proof
-  When the planner runs next, evidence is already there.
+  after human approval. When the planner runs next, evidence is already there.
-  Action issue body format (label: action):
+  Vault dispatch (requires lib/vault.sh):
-    Dispatched by predictor to test theory in #<prediction_number>.
+    source "$PROJECT_REPO_ROOT/lib/vault.sh"
-    ## Task
+    TOML_CONTENT="id = \"predict-<prediction_number>-<formula>\"
-    Run <formula name> with focus on <specific test>.
+context = \"Test prediction #<prediction_number>: <theory summary> — focus: <specific test>\"
-
+formula = \"<formula-name>\"
-    ## Expected evidence
+secrets = []
-    Results in evidence/<dir>/<date>-<name>.json
+# Unblocks: #<prediction_number>
-
+# Expected evidence: evidence/<dir>/<date>-<name>.json
-    ## Acceptance criteria
+"
-    - [ ] Formula ran to completion
+    PR_NUM=$(vault_request "predict-<prediction_number>-<formula>" "$TOML_CONTENT")
-    - [ ] Evidence file written with structured results
+    echo "Vault PR #${PR_NUM} filed to test prediction #<prediction_number>"
    ## Affected files
    - evidence/<dir>/
  Available formulas (check $PROJECT_REPO_ROOT/formulas/*.toml for current list):
    cat "$PROJECT_REPO_ROOT/formulas/"*.toml | grep '^name' | head -10
@ -156,10 +153,10 @@ tea is pre-configured with login "$TEA_LOGIN" and repo "$FORGE_REPO".
     tea issues create --login "$TEA_LOGIN" --repo "$FORGE_REPO" \
       --title "<title>" --body "<body>" --labels "prediction/unreviewed"
-2. File action dispatches (if exploiting):
+2. Dispatch formula via vault (if exploiting):
-     tea issues create --login "$TEA_LOGIN" --repo "$FORGE_REPO" \
+     source "$PROJECT_REPO_ROOT/lib/vault.sh"
-       --title "action: test prediction #NNN — <formula> <focus>" \
+     PR_NUM=$(vault_request "predict-NNN-<formula>" "$TOML_CONTENT")
-       --body "<body>" --labels "action"
+     # See EXPLOIT section above for TOML_CONTENT format
 3. Close superseded predictions:
     tea issues close <number> --login "$TEA_LOGIN" --repo "$FORGE_REPO"
@ -173,11 +170,11 @@ tea is pre-configured with login "$TEA_LOGIN" and repo "$FORGE_REPO".
 ## Rules
- Max 5 actions total (predictions + action dispatches combined)
+- Max 5 actions total (predictions + vault dispatches combined)
- Each exploit counts as 2 (prediction + action dispatch)
+- Each exploit counts as 2 (prediction + vault dispatch)
 - So: 5 explores, or 2 exploits + 1 explore, or 1 exploit + 3 explores
 - Never re-file a dismissed prediction without new evidence
- Action issues must reference existing formulas — don't invent formulas
+- Vault dispatches must reference existing formulas — don't invent formulas
 - Be specific: name the file, the metric, the threshold, the formula
 - If no weaknesses found, file nothing — that's a strong signal the project is healthy
--- a/formulas/run-publish-site.toml
+++ b/formulas/run-publish-site.toml
@ -216,7 +216,7 @@ Check 3 — engagement evidence has been collected at least once:
    jq -r '"  visitors=\(.unique_visitors) pages=\(.page_views) referrals=\(.referred_visitors)"' "$LATEST" 2>/dev/null || true
  else
    echo "NOTE: No engagement reports yet — run: bash site/collect-engagement.sh"
-    echo "The first report will appear after the cron job runs (daily at 23:55 UTC)."
+    echo "The first report will appear after the scheduled collection runs (daily at 23:55 UTC)."
  fi
 Summary:
--- a/formulas/run-supervisor.toml
+++ b/formulas/run-supervisor.toml
@ -1,7 +1,7 @@
 # formulas/run-supervisor.toml — Supervisor formula (health monitoring + remediation)
 #
-# Executed by supervisor/supervisor-run.sh via cron (every 20 minutes).
+# Executed by supervisor/supervisor-run.sh via polling loop (every 20 minutes).
-# supervisor-run.sh creates a tmux session with Claude (sonnet) and injects
+# supervisor-run.sh runs claude -p via agent-sdk.sh and injects
 # this formula with pre-collected metrics as context.
 #
 # Steps: preflight → health-assessment → decide-actions → report → journal
@ -34,13 +34,15 @@ and injected into your prompt above. Review them now.
   (24h grace period). Check the "Stale Phase Cleanup" section for any
   files cleaned or in grace period this run.
-2. Check vault state: read $OPS_REPO_ROOT/vault/pending/*.md for any procurement items
+2. Check vault state: read ${OPS_VAULT_ROOT:-$OPS_REPO_ROOT/vault/pending}/*.md for any procurement items
   the planner has filed. Note items relevant to the health assessment
   (e.g. a blocked resource that explains why the pipeline is stalled).
   Note: In degraded mode, vault items are stored locally.
 3. Read the supervisor journal for recent history:
-     JOURNAL_FILE="$OPS_REPO_ROOT/journal/supervisor/$(date -u +%Y-%m-%d).md"
+     JOURNAL_FILE="${OPS_JOURNAL_ROOT:-$OPS_REPO_ROOT/journal/supervisor}/$(date -u +%Y-%m-%d).md"
     if [ -f "$JOURNAL_FILE" ]; then cat "$JOURNAL_FILE"; fi
   Note: In degraded mode, the journal is stored locally and not committed to git.
 4. Note any values that cross these thresholds:
   - RAM available < 500MB or swap > 3GB → P0 (memory crisis)
@ -105,8 +107,13 @@ For each finding from the health assessment, decide and execute an action.
  sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1 || true
 **P1 Disk pressure:**
-  # Docker cleanup
+  # First pass: dangling only (cheap, safe)
  sudo docker system prune -f >/dev/null 2>&1 || true
  # If still > 80%, escalate to all unused images (more aggressive but necessary)
  _pct=$(df -h / | awk 'NR==2{print $5}' | tr -d '%')
  if [ "${_pct:-0}" -gt 80 ]; then
    sudo docker system prune -a -f >/dev/null 2>&1 || true
  fi
  # Truncate logs > 10MB
  for f in "$FACTORY_ROOT"/{dev,review,supervisor,gardener,planner,predictor}/*.log; do
    [ -f "$f" ] && [ "$(du -k "$f" | cut -f1)" -gt 10240 ] && truncate -s 0 "$f"
@ -137,21 +144,22 @@ For each finding from the health assessment, decide and execute an action.
 **P3 Stale PRs (CI done >20min, no push since):**
  Do NOT read dev-poll.sh, push branches, attempt merges, or investigate pipeline code.
-  Instead, nudge the dev-agent via tmux injection if a session is alive:
+  Instead, file a vault item for the dev-agent to pick up:
-    # Find the dev session for this issue
+    Write ${OPS_VAULT_ROOT:-$OPS_REPO_ROOT/vault/pending}/stale-pr-${ISSUE_NUM}.md:
-    SESSION=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "dev-.*-${ISSUE_NUM}" | head -1)
+      # Stale PR: ${PR_TITLE}
-    if [ -n "$SESSION" ]; then
+      ## What
-      # Inject a nudge into the dev-agent session
+      CI finished >20min ago but no git push has been made to the PR branch.
-      tmux send-keys -t "$SESSION" "# [supervisor] PR stale >20min — CI finished, please push or update" Enter
+      ## Why
-    fi
+      P3 — Factory degraded: PRs should be pushed within 20min of CI completion.
-  If no active tmux session exists, note it in the journal for the next dev-poll cycle.
+      ## Unblocks
      - Factory health: dev-agent will push the branch and continue the workflow
  Do NOT file vault items for stale PRs unless they remain stale for >3 consecutive runs.
 ### Cannot auto-fix → file vault item
 For P0-P2 issues that persist after auto-fix attempts, or issues requiring
 human judgment, file a vault procurement item:
-  Write $OPS_REPO_ROOT/vault/pending/supervisor-<issue-slug>.md:
+  Write ${OPS_VAULT_ROOT:-$OPS_REPO_ROOT/vault/pending}/supervisor-<issue-slug>.md:
    # <What is needed>
    ## What
    <description of the problem and why the supervisor cannot fix it>
@ -160,13 +168,23 @@ human judgment, file a vault procurement item:
    ## Unblocks
    - Factory health: <what this resolves>
  Vault PR filed on ops repo — human approves via PR review.
  Note: In degraded mode (no ops repo), vault items are written locally to ${OPS_VAULT_ROOT:-local path}.
-Read the relevant best-practices file before taking action:
+### Reading best-practices files
-  cat "$OPS_REPO_ROOT/knowledge/memory.md"    # P0
+
-  cat "$OPS_REPO_ROOT/knowledge/disk.md"      # P1
+Read the relevant best-practices file before taking action. In degraded mode,
-  cat "$OPS_REPO_ROOT/knowledge/ci.md"        # P2 CI
+use the bundled knowledge files from ${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}:
-  cat "$OPS_REPO_ROOT/knowledge/dev-agent.md" # P2 agent
+
-  cat "$OPS_REPO_ROOT/knowledge/git.md"       # P2 git
+  cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/memory.md"    # P0
  cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/disk.md"      # P1
  cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/ci.md"        # P2 CI
  cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/dev-agent.md" # P2 agent
  cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/git.md"       # P2 git
  cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/review-agent.md" # P2 review
  cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/forge.md"     # P2 forge
 Note: If OPS_REPO_ROOT is not available (degraded mode), the bundled knowledge
 files in ${OPS_KNOWLEDGE_ROOT:-<unset>} provide fallback guidance.
 Track what you fixed and what vault items you filed for the report step.
 """
@ -208,7 +226,7 @@ description = """
 Append a timestamped entry to the supervisor journal.
 File path:
-  $OPS_REPO_ROOT/journal/supervisor/$(date -u +%Y-%m-%d).md
+  ${OPS_JOURNAL_ROOT:-$OPS_REPO_ROOT/journal/supervisor}/$(date -u +%Y-%m-%d).md
 If the file already exists (multiple runs per day), append a new section.
 If it does not exist, create it.
@ -241,17 +259,24 @@ run-to-run context so future supervisor runs can detect trends
 IMPORTANT: Do NOT commit or push the journal — it is a local working file.
 The journal directory is committed to git periodically by other agents.
 Note: In degraded mode (no ops repo), the journal is written locally to
 ${OPS_JOURNAL_ROOT:-<unset>} and is NOT automatically committed to any repo.
 ## Learning
-If you discover something new during this run, append it to the relevant
+If you discover something new during this run:
-knowledge file in the ops repo:
+
 - In full mode (ops repo available): append to the relevant knowledge file:
    echo "### Lesson title
    Description of what you learned." >> "${OPS_REPO_ROOT}/knowledge/<file>.md"
 - In degraded mode: write to the local knowledge directory for reference:
    echo "### Lesson title
    Description of what you learned." >> "${OPS_KNOWLEDGE_ROOT:-<unset>}/<file>.md"
 Knowledge files: memory.md, disk.md, ci.md, forge.md, dev-agent.md,
 review-agent.md, git.md.
-After writing the journal, write the phase signal:
+After writing the journal, the agent session completes automatically.
  echo 'PHASE:done' > "$PHASE_FILE"
 """
 needs = ["report"]
--- a/formulas/triage.toml
+++ b/formulas/triage.toml
@ -0,0 +1,267 @@
 # formulas/triage.toml — Triage-agent formula (generic template)
 #
 # This is the base template for triage investigations.
 # Project-specific formulas (e.g. formulas/triage-harb.toml) extend this by
 # overriding the fields in the [project] section and providing stack-specific
 # step descriptions.
 #
 # Triggered by: bug-report + in-triage label combination.
 # Set by the reproduce-agent when:
 #   - Bug was confirmed (reproduced)
 #   - Quick log analysis did not reveal an obvious root cause
 #   - Reproduce-agent documented all steps taken and logs examined
 #
 # Steps:
 #   1. read-findings   — parse issue comments for prior reproduce-agent evidence
 #   2. trace-data-flow — follow symptom through UI → API → backend → data store
 #   3. instrumentation — throwaway branch, add logging, restart, observe
 #   4. decompose       — file backlog issues for each root cause
 #   5. link-back       — update original issue, swap in-triage → in-progress
 #   6. cleanup         — delete throwaway debug branch
 #
 # Best practices:
 #   - Start from reproduce-agent findings; do not repeat their work
 #   - Budget: 70% tracing data flow, 30% instrumented re-runs
 #   - Multiple causes: check if layered (Depends-on) or independent (Related)
 #   - Always delete the throwaway debug branch before finishing
 #   - If inconclusive after full turn budget: leave in-triage, post what was
 #     tried, do NOT relabel — supervisor handles stale triage sessions
 #
 # Project-specific formulas extend this template by defining:
 #   - stack_script: how to start/stop the project stack
 #   - [project].data_flow: layer names (e.g. "chain → indexer → GraphQL → UI")
 #   - [project].api_endpoints: which APIs/services to inspect
 #   - [project].stack_lock: stack lock configuration
 #   - Per-step description overrides with project-specific commands
 #
 # No hard timeout — runs until Claude hits its turn limit.
 # Stack lock held for full run (triage is rare; blocking CI is acceptable).
 name            = "triage"
 description     = "Deep root cause analysis: trace data flow, add debug instrumentation, decompose causes into backlog issues."
 version         = 2
 # Set stack_script to the restart command for local stacks.
 # Leave empty ("") to connect to an existing staging environment.
 stack_script    = ""
 tools           = ["playwright"]
 # ---------------------------------------------------------------------------
 # Project-specific extension fields.
 # Override these in formulas/triage-<project>.toml.
 # ---------------------------------------------------------------------------
 [project]
 # Human-readable layer names for the data-flow trace (generic default).
 # Example project override: "chain → indexer → GraphQL → UI"
 data_flow       = "UI → API → backend → data store"
 # Comma-separated list of API endpoints or services to inspect.
 # Example: "GraphQL /graphql, REST /api/v1, RPC ws://localhost:8545"
 api_endpoints   = ""
 # Stack lock configuration (leave empty for default behavior).
 # Example: "full" to hold a full stack lock during triage.
 stack_lock      = ""
 # ---------------------------------------------------------------------------
 # Steps
 # ---------------------------------------------------------------------------
 [[steps]]
 id    = "read-findings"
 title = "Read reproduce-agent findings"
 description = """
 Before doing anything else, parse all prior evidence from the issue comments.
 1. Fetch the issue body and all comments:
     curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
       "${FORGE_API}/issues/${ISSUE_NUMBER}" | jq -r '.body'
     curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
       "${FORGE_API}/issues/${ISSUE_NUMBER}/comments" | jq -r '.[].body'
 2. Identify the reproduce-agent comment (look for sections like
   "Reproduction steps", "Logs examined", "What was tried").
 3. Extract and note:
   - The exact symptom (error message, unexpected value, visual regression)
   - Steps that reliably trigger the bug
   - Log lines or API responses already captured
   - Any hypotheses the reproduce-agent already ruled out
 Do NOT repeat work the reproduce-agent already did. Your job starts where
 theirs ended. If no reproduce-agent comment is found, note it and proceed
 with fresh investigation using the issue body only.
 """
 [[steps]]
 id    = "trace-data-flow"
 title = "Trace data flow from symptom to source"
 description = """
 Systematically follow the symptom backwards through each layer of the stack.
 Spend ~70% of your total turn budget here before moving to instrumentation.
 Generic layer traversal (adapt to the project's actual stack):
  UI → API → backend → data store
 For each layer boundary:
  1. What does the upstream layer send?
  2. What does the downstream layer expect?
  3. Is there a mismatch? If yes — is this the root cause or a symptom?
 Tracing checklist:
  a. Start at the layer closest to the visible symptom.
  b. Read the relevant source files — do not guess data shapes.
  c. Cross-reference API contracts: compare what the code sends vs what it
     should send according to schemas, type definitions, or documentation.
  d. Check recent git history on suspicious files:
       git log --oneline -20 -- <file>
  e. Search for related issues or TODOs in the code:
       grep -r "TODO\|FIXME\|HACK" -- <relevant directory>
 Capture for each layer:
  - The data shape flowing in and out (field names, types, nullability)
  - Whether the layer's behavior matches its documented contract
  - Any discrepancy found
 If a clear root cause becomes obvious during tracing, note it and continue
 checking whether additional causes exist downstream.
 """
 needs = ["read-findings"]
 [[steps]]
 id    = "instrumentation"
 title = "Add debug instrumentation on a throwaway branch"
 description = """
 Use ~30% of your total turn budget here. Only instrument after tracing has
 identified the most likely failure points — do not instrument blindly.
 1. Create a throwaway debug branch (NEVER commit this to main):
     cd "$PROJECT_REPO_ROOT"
     git checkout -b debug/triage-${ISSUE_NUMBER}
 2. Add targeted logging at the layer boundaries identified during tracing:
   - Console.log / structured log statements around the suspicious code path
   - Log the actual values flowing through: inputs, outputs, intermediate state
   - Add verbose mode flags if the stack supports them
   - Keep instrumentation minimal — only what confirms or refutes the hypothesis
 3. Restart the stack using the configured script (if set):
     ${stack_script:-"# No stack_script configured — restart manually or connect to staging"}
 4. Re-run the reproduction steps from the reproduce-agent findings.
 5. Observe and capture new output:
   - Paste relevant log lines into your working notes
   - Note whether the observed values match or contradict the hypothesis
 6. If the first instrumentation pass is inconclusive, iterate:
   - Narrow the scope to the next most suspicious boundary
   - Re-instrument, restart, re-run
   - Maximum 2-3 instrumentation rounds before declaring inconclusive
 Do NOT push the debug branch. It will be deleted in the cleanup step.
 """
 needs = ["trace-data-flow"]
 [[steps]]
 id    = "decompose"
 title = "Decompose root causes into backlog issues"
 description = """
 After tracing and instrumentation, articulate each distinct root cause.
 For each root cause found:
 1. Determine the relationship to other causes:
   - Layered (one causes another) → use Depends-on in the issue body
   - Independent (separate code paths fail independently) → use Related
 2. Create a backlog issue for each root cause:
     curl -sf -X POST "${FORGE_API}/issues" \\
       -H "Authorization: token ${FORGE_TOKEN}" \\
       -H "Content-Type: application/json" \\
       -d '{
         "title": "fix: <specific description of root cause N>",
         "body": "## Root cause\\n<exact code path, file:line>\\n\\n## Fix suggestion\\n<recommended approach>\\n\\n## Context\\nDecomposed from #${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>",
         "labels": [{"name": "backlog"}]
       }'
 3. Note the newly created issue numbers.
 If only one root cause is found, still create a single backlog issue with
 the specific code location and fix suggestion.
 If the investigation is inconclusive (no clear root cause found), skip this
 step and proceed directly to link-back with the inconclusive outcome.
 """
 needs = ["instrumentation"]
 [[steps]]
 id    = "link-back"
 title = "Update original issue and relabel"
 description = """
 Post a summary comment on the original issue and update its labels.
 ### If root causes were found (conclusive):
 Post a comment:
  "## Triage findings
  Found N root cause(s):
  - #X — <one-line description> (cause 1 of N)
  - #Y — <one-line description> (cause 2 of N, depends on #X)
  Data flow traced: <layer where the bug originates>
  Instrumentation: <key log output that confirmed the cause>
  Next step: backlog issues above will be implemented in dependency order."
 Then swap labels:
  - Remove: in-triage
  - Add: in-progress
 ### If investigation was inconclusive (turn budget exhausted):
 Post a comment:
  "## Triage — inconclusive
  Traced: <layers checked>
  Tried: <instrumentation attempts and what they showed>
  Hypothesis: <best guess at cause, if any>
  No definitive root cause identified. Leaving in-triage for supervisor
  to handle as a stale triage session."
 Do NOT relabel. Leave in-triage. The supervisor monitors stale triage
 sessions and will escalate or reassign.
 **CRITICAL: Write outcome file** — Always write the outcome to the outcome file:
  - If root causes found (conclusive): echo "reproduced" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt
  - If inconclusive: echo "needs-triage" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt
 """
 needs = ["decompose"]
 [[steps]]
 id    = "cleanup"
 title = "Delete throwaway debug branch"
 description = """
 Always delete the debug branch, even if the investigation was inconclusive.
 1. Switch back to the main branch:
     cd "$PROJECT_REPO_ROOT"
     git checkout "$PRIMARY_BRANCH"
 2. Delete the local debug branch:
     git branch -D debug/triage-${ISSUE_NUMBER}
 3. Confirm no remote was pushed (if accidentally pushed, delete it too):
     git push origin --delete debug/triage-${ISSUE_NUMBER} 2>/dev/null || true
 4. Verify the worktree is clean:
     git status
     git worktree list
 A clean repo is a prerequisite for the next dev-agent run. Never leave
 debug branches behind — they accumulate and pollute the branch list.
 """
 needs = ["link-back"]
--- a/gardener/AGENTS.md
+++ b/gardener/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: 33eb565d7e0c5b7e0159e1720ba7f79126a7e25e -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Gardener Agent
 **Role**: Backlog grooming — detect duplicate issues, missing acceptance
@ -7,15 +7,18 @@ the quality gate: strips the `backlog` label from issues that lack acceptance
 criteria checkboxes (`- [ ]`) or an `## Affected files` section. Invokes
 Claude to fix what it can; files vault items for what it cannot.
-**Trigger**: `gardener-run.sh` runs 4x/day via cron. Sources `lib/guard.sh` and
+**Trigger**: `gardener-run.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh`
-calls `check_active gardener` first — skips if `$FACTORY_ROOT/state/.gardener-active`
+every 6 hours (iteration math at line 182-194). Sources `lib/guard.sh` and calls
-is absent. Then creates a tmux session with `claude --model sonnet`, injects
+`check_active gardener` first — skips if `$FACTORY_ROOT/state/.gardener-active` is absent.
-`formulas/run-gardener.toml` as context, monitors the phase file, and cleans up
+**Early-exit optimization**: if no issues, PRs, or repo files have changed since the last
-on completion or timeout (2h max session). No action issues — the gardener runs
+run (checked via Forgejo API and `git diff`), the model is not invoked — the run exits
-directly from cron like the planner, predictor, and supervisor.
+immediately (no tmux session, no tokens consumed). Otherwise, creates a tmux session with
 `claude --model sonnet`, injects `formulas/run-gardener.toml` as context, monitors the
 phase file, and cleans up on completion or timeout (2h max session). No action issues —
 the gardener runs as part of the polling loop alongside the planner, predictor, and supervisor.
 **Key files**:
- `gardener/gardener-run.sh` — Cron wrapper + orchestrator: lock, memory guard,
+- `gardener/gardener-run.sh` — Polling loop participant + orchestrator: lock, memory guard,
  sources disinto project config, creates tmux session, injects formula prompt,
  monitors phase file via custom `_gardener_on_phase_change` callback (passed to
  `run_formula_and_monitor`). Stays alive through CI/review/merge cycle after
@ -32,8 +35,8 @@ directly from cron like the planner, predictor, and supervisor.
 - `FORGE_TOKEN`, `FORGE_GARDENER_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`
 - `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by gardener-run.sh)
-**Lifecycle**: gardener-run.sh (cron 0,6,12,18) → `check_active gardener` → lock + memory guard →
+**Lifecycle**: gardener-run.sh (invoked by polling loop every 6h, `check_active gardener`) →
-load formula + context → create tmux session →
+lock + memory guard → load formula + context → create tmux session →
 Claude grooms backlog (writes proposed actions to manifest), bundles dust,
 updates AGENTS.md, commits manifest + docs to PR →
 `PHASE:awaiting_ci` (stays alive) → CI pass → `PHASE:awaiting_review` →
--- a/gardener/best-practices.md
+++ b/gardener/best-practices.md
@ -51,3 +51,4 @@ Compact, decision-ready. Human should be able to reply "1a 2c 3b" and be done.
 - Dev-agent doesn't understand the product — clear acceptance criteria save 2-3 CI cycles
 - Feature issues MUST list affected e2e test files
 - Issue templates from ISSUE-TEMPLATES.md propagate via triage gate
 - **AD-002 is a runtime invariant; nothing for the gardener to check at issue-groom time.** Concurrency is enforced by `flock session.lock` within each container and by `issue_claim` for per-issue work. A violation manifests as a 401 or VRAM OOM in agent logs, not as a malformed issue.
--- a/gardener/gardener-run.sh
+++ b/gardener/gardener-run.sh
@ -1,12 +1,12 @@
 #!/usr/bin/env bash
 # =============================================================================
-# gardener-run.sh — Cron wrapper: gardener execution via SDK + formula
+# gardener-run.sh — Polling-loop wrapper: gardener execution via SDK + formula
 #
 # Synchronous bash loop using claude -p (one-shot invocation).
 # No tmux sessions, no phase files — the bash script IS the state machine.
 #
 # Flow:
-#   1. Guards: cron lock, memory check
+#   1. Guards: run lock, memory check
 #   2. Load formula (formulas/run-gardener.toml)
 #   3. Build context: AGENTS.md, scratch file, prompt footer
 #   4. agent_run(worktree, prompt) → Claude does maintenance, pushes if needed
@ -17,7 +17,7 @@
 # Usage:
 #   gardener-run.sh [projects/disinto.toml]   # project config (default: disinto)
 #
-# Cron: 0 0,6,12,18 * * * cd /home/debian/dark-factory && bash gardener/gardener-run.sh projects/disinto.toml
+# Called by: entrypoint.sh polling loop (every 6 hours)
 # =============================================================================
 set -euo pipefail
@ -54,22 +54,46 @@ SCRATCH_FILE="/tmp/gardener-${PROJECT_NAME}-scratch.md"
 RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt"
 GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt"
 WORKTREE="/tmp/${PROJECT_NAME}-gardener-run"
 LAST_SHA_FILE="${DISINTO_DATA_DIR}/gardener-last-sha.txt"
-log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
+# Override LOG_AGENT for consistent agent identification
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh and env.sh log()
 LOG_AGENT="gardener"
 # ── Guards ────────────────────────────────────────────────────────────────
 check_active gardener
-acquire_cron_lock "/tmp/gardener-run.lock"
+acquire_run_lock "/tmp/gardener-run.lock"
-check_memory 2000
+memory_guard 2000
 log "--- Gardener run start ---"
-# ── Resolve agent identity for .profile repo ────────────────────────────
+# ── Resolve forge remote for git operations ─────────────────────────────
-if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_GARDENER_TOKEN:-}" ]; then
+# Run git operations from the project checkout, not the baked code dir
-  AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_GARDENER_TOKEN}" \
+cd "$PROJECT_REPO_ROOT"
-    "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
+
 resolve_forge_remote
 # ── Precondition checks: skip if nothing to do ────────────────────────────
 # Check for new commits since last run
 CURRENT_SHA=$(git -C "$FACTORY_ROOT" rev-parse HEAD 2>/dev/null || echo "")
 LAST_SHA=$(cat "$LAST_SHA_FILE" 2>/dev/null || echo "")
 # Check for open issues needing grooming
 backlog_count=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
  "${FORGE_API}/issues?labels=backlog&state=open&limit=1" 2>/dev/null | jq length) || backlog_count=0
 tech_debt_count=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
  "${FORGE_API}/issues?labels=tech-debt&state=open&limit=1" 2>/dev/null | jq length) || tech_debt_count=0
 if [ "$CURRENT_SHA" = "$LAST_SHA" ] && [ "${backlog_count:-0}" -eq 0 ] && [ "${tech_debt_count:-0}" -eq 0 ]; then
  log "no new commits and no issues to groom — skipping"
  exit 0
 fi
 log "current sha: ${CURRENT_SHA:0:8}..., backlog issues: ${backlog_count}, tech-debt issues: ${tech_debt_count}"
 # ── Resolve agent identity for .profile repo ────────────────────────────
 resolve_agent_identity || true
 # ── Load formula + context ───────────────────────────────────────────────
 load_formula_or_profile "gardener" "$FACTORY_ROOT/formulas/run-gardener.toml" || exit 1
 build_context_block AGENTS.md
@ -127,16 +151,7 @@ ${SCRATCH_INSTRUCTION}
 ${PROMPT_FOOTER}"
 # ── Create worktree ──────────────────────────────────────────────────────
-cd "$PROJECT_REPO_ROOT"
+formula_worktree_setup "$WORKTREE"
 git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true
 worktree_cleanup "$WORKTREE"
 git worktree add "$WORKTREE" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null
 cleanup() {
  worktree_cleanup "$WORKTREE"
  rm -f "$GARDENER_PR_FILE"
 }
 trap cleanup EXIT
 # ── Post-merge manifest execution ────────────────────────────────────────
 # Reads gardener/pending-actions.json and executes each action via API.
@ -165,19 +180,21 @@ _gardener_execute_manifest() {
    case "$action" in
      add_label)
-        local label label_id
+        local label label_id http_code resp
        label=$(jq -r ".[$i].label" "$manifest_file")
        label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
          "${FORGE_API}/labels" | jq -r --arg n "$label" \
          '.[] | select(.name == $n) | .id') || true
        if [ -n "$label_id" ]; then
-          if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
+          resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \
               -H 'Content-Type: application/json' \
               "${FORGE_API}/issues/${issue}/labels" \
-               -d "{\"labels\":[${label_id}]}" >/dev/null 2>&1; then
+               -d "{\"labels\":[${label_id}]}" 2>/dev/null) || true
          http_code=$(echo "$resp" | tail -1)
          if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
            log "manifest: add_label '${label}' to #${issue}"
          else
-            log "manifest: FAILED add_label '${label}' to #${issue}"
+            log "manifest: FAILED add_label '${label}' to #${issue}: HTTP ${http_code}"
          fi
        else
          log "manifest: FAILED add_label — label '${label}' not found"
@ -185,17 +202,19 @@ _gardener_execute_manifest() {
        ;;
      remove_label)
-        local label label_id
+        local label label_id http_code resp
        label=$(jq -r ".[$i].label" "$manifest_file")
        label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
          "${FORGE_API}/labels" | jq -r --arg n "$label" \
          '.[] | select(.name == $n) | .id') || true
        if [ -n "$label_id" ]; then
-          if curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
+          resp=$(curl -sf -w "\n%{http_code}" -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
-               "${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1; then
+               "${FORGE_API}/issues/${issue}/labels/${label_id}" 2>/dev/null) || true
          http_code=$(echo "$resp" | tail -1)
          if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
            log "manifest: remove_label '${label}' from #${issue}"
          else
-            log "manifest: FAILED remove_label '${label}' from #${issue}"
+            log "manifest: FAILED remove_label '${label}' from #${issue}: HTTP ${http_code}"
          fi
        else
          log "manifest: FAILED remove_label — label '${label}' not found"
@ -203,34 +222,38 @@ _gardener_execute_manifest() {
        ;;
      close)
-        local reason
+        local reason http_code resp
        reason=$(jq -r ".[$i].reason // empty" "$manifest_file")
-        if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
+        resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
             -H 'Content-Type: application/json' \
             "${FORGE_API}/issues/${issue}" \
-             -d '{"state":"closed"}' >/dev/null 2>&1; then
+             -d '{"state":"closed"}' 2>/dev/null) || true
        http_code=$(echo "$resp" | tail -1)
        if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
          log "manifest: closed #${issue} (${reason})"
        else
-          log "manifest: FAILED close #${issue}"
+          log "manifest: FAILED close #${issue}: HTTP ${http_code}"
        fi
        ;;
      comment)
-        local body escaped_body
+        local body escaped_body http_code resp
        body=$(jq -r ".[$i].body" "$manifest_file")
        escaped_body=$(printf '%s' "$body" | jq -Rs '.')
-        if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
+        resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \
             -H 'Content-Type: application/json' \
             "${FORGE_API}/issues/${issue}/comments" \
-             -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then
+             -d "{\"body\":${escaped_body}}" 2>/dev/null) || true
        http_code=$(echo "$resp" | tail -1)
        if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
          log "manifest: commented on #${issue}"
        else
-          log "manifest: FAILED comment on #${issue}"
+          log "manifest: FAILED comment on #${issue}: HTTP ${http_code}"
        fi
        ;;
      create_issue)
-        local title body labels escaped_title escaped_body label_ids
+        local title body labels escaped_title escaped_body label_ids http_code resp
        title=$(jq -r ".[$i].title" "$manifest_file")
        body=$(jq -r ".[$i].body" "$manifest_file")
        labels=$(jq -r ".[$i].labels // [] | .[]" "$manifest_file")
@ -250,40 +273,46 @@ _gardener_execute_manifest() {
          done <<< "$labels"
          [ -n "$ids_json" ] && label_ids="[${ids_json}]"
        fi
-        if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
+        resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \
             -H 'Content-Type: application/json' \
             "${FORGE_API}/issues" \
-             -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" >/dev/null 2>&1; then
+             -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" 2>/dev/null) || true
        http_code=$(echo "$resp" | tail -1)
        if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
          log "manifest: created issue '${title}'"
        else
-          log "manifest: FAILED create_issue '${title}'"
+          log "manifest: FAILED create_issue '${title}': HTTP ${http_code}"
        fi
        ;;
      edit_body)
-        local body escaped_body
+        local body escaped_body http_code resp
        body=$(jq -r ".[$i].body" "$manifest_file")
        escaped_body=$(printf '%s' "$body" | jq -Rs '.')
-        if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
+        resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
             -H 'Content-Type: application/json' \
             "${FORGE_API}/issues/${issue}" \
-             -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then
+             -d "{\"body\":${escaped_body}}" 2>/dev/null) || true
        http_code=$(echo "$resp" | tail -1)
        if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
          log "manifest: edited body of #${issue}"
        else
-          log "manifest: FAILED edit_body #${issue}"
+          log "manifest: FAILED edit_body #${issue}: HTTP ${http_code}"
        fi
        ;;
      close_pr)
-        local pr
+        local pr http_code resp
        pr=$(jq -r ".[$i].pr" "$manifest_file")
-        if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
+        resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
             -H 'Content-Type: application/json' \
             "${FORGE_API}/pulls/${pr}" \
-             -d '{"state":"closed"}' >/dev/null 2>&1; then
+             -d '{"state":"closed"}' 2>/dev/null) || true
        http_code=$(echo "$resp" | tail -1)
        if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
          log "manifest: closed PR #${pr}"
        else
-          log "manifest: FAILED close_pr #${pr}"
+          log "manifest: FAILED close_pr #${pr}: HTTP ${http_code}"
        fi
        ;;
@ -328,9 +357,9 @@ if [ -n "$PR_NUMBER" ]; then
  if [ "$_PR_WALK_EXIT_REASON" = "merged" ]; then
    # Post-merge: pull primary, mirror push, execute manifest
-    git -C "$PROJECT_REPO_ROOT" fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true
+    git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true
    git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true
-    git -C "$PROJECT_REPO_ROOT" pull --ff-only origin "$PRIMARY_BRANCH" 2>/dev/null || true
+    git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true
    mirror_push
    _gardener_execute_manifest
    rm -f "$SCRATCH_FILE"
@ -347,4 +376,8 @@ fi
 profile_write_journal "gardener-run" "Gardener run $(date -u +%Y-%m-%d)" "complete" "" || true
 rm -f "$GARDENER_PR_FILE"
 # Persist last-seen SHA for next run comparison
 echo "$CURRENT_SHA" > "$LAST_SHA_FILE"
 log "--- Gardener run done ---"
--- a/gardener/pending-actions.json
+++ b/gardener/pending-actions.json
@ -1,22 +1,47 @@
 [
  {
    "action": "close",
    "issue": 419,
    "reason": "Vision goal complete — all sub-issues #437-#454 closed, vault blast-radius redesign delivered"
  },
  {
    "action": "close",
    "issue": 494,
    "reason": "Resolved by PRs #502 and #503 (both merged) — repo_root workaround removed, container paths derived at runtime"
  },
  {
    "action": "close",
    "issue": 477,
    "reason": "Obsolete — #379 (while-true loop) was deployed on 2026-04-08; env.sh container guard is now correct behavior, no revert needed"
  },
  {
    "action": "edit_body",
-    "issue": 189,
+    "issue": 498,
-    "body": "Flagged by AI reviewer in PR #188.\n\n## Problem\n\nBoth `get_pr_merger` and `get_pr_reviews` end with `|| true`, meaning they always return exit code 0. The callers pattern-match on the exit code:\n\n```bash\nreviews_json=$(get_pr_reviews \"$pr_number\") || {\n  log \"WARNING: Could not fetch reviews...\"\n  return 1\n}\n```\n\nBecause the function always succeeds, this `|| { ... }` block is unreachable dead code. If the underlying `curl` call fails, `reviews_json` is empty. The subsequent `jq 'length // 0'` on empty input produces empty output; `[ \"\" -eq 0 ]` then throws a bash integer error instead of the intended warning log. The fallback to the merger check still occurs, but via an uncontrolled error path.\n\nSame pre-existing issue in `get_pr_merger`.\n\n## Fix\n\nRemove `|| true` from both helpers so curl failures propagate, letting the `|| { log ...; return 1; }` handlers fire correctly.\n\n---\n*Auto-created from AI review*\n\n## Acceptance criteria\n\n- [ ] `get_pr_merger` does not end with `|| true`\n- [ ] `get_pr_reviews` does not end with `|| true`\n- [ ] When curl fails inside `get_pr_merger`, the caller `|| { ... }` error handler fires\n- [ ] When curl fails inside `get_pr_reviews`, the caller `|| { ... }` error handler fires\n- [ ] ShellCheck passes on `docker/edge/dispatcher.sh`\n\n## Affected files\n\n- `docker/edge/dispatcher.sh` — `get_pr_merger()` and `get_pr_reviews()` functions\n"
+    "body": "Flagged by AI reviewer in PR #496.\n\n## Problem\n\n`has_responses_to_process` is only set to `true` inside the `open_arch_prs >= 3` gate in `architect/architect-run.sh` (line 543). When fewer than 3 architect PRs are open, ACCEPT/REJECT responses on existing PRs are never processed — the response-processing block at line 687 defaults to `false` and is skipped entirely.\n\nThis means that if a user ACCEPTs or REJECTs a pitch while the open PR count is below 3, the architect agent will never handle the response.\n\n## Fix\n\nSet `has_responses_to_process` (or an equivalent guard) unconditionally by scanning open PRs for ACCEPT/REJECT responses, not only when the 3-PR cap is hit.\n\n---\n*Auto-created from AI review*\n\n## Acceptance criteria\n\n- [ ] `has_responses_to_process` is computed by scanning open architect PRs for ACCEPT/REJECT responses regardless of `open_arch_prs` count\n- [ ] When a user posts ACCEPT or REJECT on an architect PR and open PR count < 3, the response is processed in the same run\n- [ ] Existing behavior when `open_arch_prs >= 3` is unchanged\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `architect/architect-run.sh` (lines ~543 and ~687 — response-processing gate)"
  },
  {
    "action": "add_label",
-    "issue": 189,
+    "issue": 498,
    "label": "backlog"
  },
  {
    "action": "edit_body",
-    "issue": 9,
+    "issue": 499,
-    "body": "## Problem\n\nThe Forgejo instance runs inside a Docker volume (`disinto_forgejo-data`). If the containers or volumes are destroyed, all issues, PRs, review comments, and project history are lost. Git repo data survives on mirrors (Codeberg, GitHub), but the issue tracker does not.\n\n## Design\n\nAdd a periodic `forgejo dump` to export the Forgejo database (issues, users, PRs, comments, labels) to a compressed archive. Store the backup in the ops repo or a dedicated backup location.\n\n### Considerations\n\n- `forgejo dump` produces a zip with the database, repos, and config\n- Only the database portion is critical (repos are mirrored elsewhere)\n- Could run as a cron job inside the agents container, or as a supervisor health check\n- Backup destination options: ops repo, host filesystem, or object storage\n- Retention policy: keep N most recent dumps\n\n## Why not mirror issues to external forges?\n\nThe factory uses a single internal Forgejo API regardless of where mirrors go (Codeberg, GitHub, or both). Keeping one API surface is simpler than adapting to multiple external forge APIs.\n\n## Acceptance criteria\n\n- [ ] A cron job or supervisor health check runs `forgejo dump` periodically (daily or configurable)\n- [ ] Backup archive is stored in a persistent location outside the Docker volume (ops repo or host filesystem)\n- [ ] A retention policy keeps at most N recent backups and removes older ones\n- [ ] The backup mechanism is documented in AGENTS.md or README.md\n- [ ] Failure to backup is logged and does not crash the supervisor\n\n## Affected files\n\n- `supervisor/supervisor-run.sh` or `supervisor/preflight.sh` — add backup health check or cron trigger\n- `disinto-factory/SKILL.md` or `README.md` — document backup configuration\n"
+    "body": "Flagged by AI reviewer in PR #496.\n\n## Problem\n\nIn `architect/architect-run.sh` line 203, the `has_open_subissues` function compares `.number` (a JSON integer) against `$vid` (a bash string via `--arg`). In jq, `42 != \"42\"` evaluates to true (different types are never equal), so the self-exclusion filter never fires. In practice this is low-risk since vision issues don't contain 'Decomposed from #N' in their own bodies, but the self-exclusion logic is silently broken.\n\n## Fix\n\nCast the string to a number in jq: `select(.number != ($vid | tonumber))`\n\n---\n*Auto-created from AI review*\n\n## Acceptance criteria\n\n- [ ] `has_open_subissues` self-exclusion filter correctly excludes the vision issue itself using `($vid | tonumber)` cast\n- [ ] A vision issue does not appear in its own subissue list\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `architect/architect-run.sh` (line ~203 — `has_open_subissues` jq filter)"
  },
  {
    "action": "add_label",
-    "issue": 9,
+    "issue": 499,
    "label": "backlog"
  },
  {
    "action": "edit_body",
    "issue": 471,
    "body": "## Bug description\n\nWhen dev-bot picks a backlog issue and launches dev-agent.sh, a second dev-poll instance (dev-qwen) can race ahead and mark the issue as stale/blocked before dev-agent.sh finishes claiming it.\n\n## Reproduction\n\nObserved on issues #443 and #445 (2026-04-08):\n\n**#443 timeline:**\n- `20:39:03` — dev-bot removes `backlog`, adds `in-progress` (via dev-poll backlog pickup)\n- `20:39:04` — dev-qwen removes `in-progress`, adds `blocked` with reason `no_assignee_no_open_pr_no_lock`\n- `20:40:11` — dev-bot pushes commit (dev-agent was actually working the whole time)\n- `20:44:02` — PR merged, issue closed\n\n**#445 timeline:**\n- `20:54:03` — dev-bot adds `in-progress`\n- `20:54:06` — dev-qwen marks `blocked` (3 seconds later)\n- `20:55:13` — dev-bot pushes commit\n- `21:09:03` — PR merged, issue closed\n\nIn both cases, the work completed successfully despite being labeled blocked.\n\n## Root cause\n\n`issue_claim()` in `lib/issue-lifecycle.sh` performs three sequential API calls:\n1. PATCH assignee\n2. POST in-progress label\n3. DELETE backlog label\n\nMeanwhile, dev-poll on another agent (dev-qwen) runs its orphan scan, sees the issue labeled `in-progress` but with no assignee set yet (assign PATCH hasn't landed or was read stale), no open PR, and no lock file. It concludes the issue is stale and relabels to `blocked`.\n\nThe race window is ~1-3 seconds between in-progress being set and the assignee being visible to other pollers.\n\n## Impact\n\n- Issues get spuriously labeled `blocked` with a misleading stale diagnostic comment\n- dev-agent continues working anyway (it already has the issue number), so the blocked label is just noise\n- But it could confuse the gardener or humans reading the issue timeline\n- If another dev-poll instance picks up the blocked issue for recovery before the original agent finishes, it could cause duplicate work\n\n## Possible fixes\n\n1. **Assign before labeling**: In `issue_claim()`, set the assignee first, then add in-progress. This way, by the time in-progress is visible, the assignee is already set.\n2. **Grace period in stale detection**: Skip issues whose in-progress label was added less than N seconds ago (check label event timestamp via timeline API).\n3. **Lock file before label**: Write the agent lock file (`/tmp/dev-impl-summary-...`) at the start of dev-agent.sh before calling `issue_claim()`, so the stale detector sees the lock.\n4. **Atomic claim check**: dev-poll should re-check assignee after a short delay before declaring stale, to allow for API propagation.\n\n## Acceptance criteria\n\n- [ ] Stale detection in dev-poll does not mark an issue as blocked within the first 60 seconds of the in-progress label being applied\n- [ ] `issue_claim()` assigns the issue before adding the in-progress label (or equivalent fix is implemented)\n- [ ] No spurious `blocked` labels appear on issues that are actively being worked (verified by log inspection or integration test)\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `lib/issue-lifecycle.sh` — `issue_claim()` function (assignee + label ordering)\n- `dev/dev-poll.sh` — orphan/stale detection logic"
  },
  {
    "action": "add_label",
    "issue": 471,
    "label": "backlog"
  }
 ]
--- a/knowledge/ci.md
+++ b/knowledge/ci.md
@ -0,0 +1,28 @@
 # CI/CD — Best Practices
 ## CI Pipeline Issues (P2)
 When CI pipelines are stuck running >20min or pending >30min:
 ### Investigation Steps
 1. Check pipeline status via Forgejo API:
   ```bash
   curl -sf -H "Authorization: token $FORGE_TOKEN" \
     "$FORGE_API/pipelines?limit=50" | jq '.[] | {number, status, created}'
   ```
 2. Check Woodpecker CI if configured:
   ```bash
   curl -sf -H "Authorization: Bearer $WOODPECKER_TOKEN" \
     "$WOODPECKER_SERVER/api/repos/${WOODPECKER_REPO_ID}/pipelines?limit=10"
   ```
 ### Common Fixes
 - **Stuck pipeline**: Cancel via Forgejo API, retrigger
 - **Pending pipeline**: Check queue depth, scale CI runners
 - **Failed pipeline**: Review logs, fix failing test/step
 ### Prevention
 - Set timeout limits on CI pipelines
 - Monitor runner capacity and scale as needed
 - Use caching for dependencies to reduce build time
--- a/knowledge/dev-agent.md
+++ b/knowledge/dev-agent.md
@ -0,0 +1,28 @@
 # Dev Agent — Best Practices
 ## Dev Agent Issues (P2)
 When dev-agent is stuck, blocked, or in bad state:
 ### Dead Lock File
 ```bash
 # Check if process still exists
 ps -p $(cat /path/to/lock.file) 2>/dev/null || rm -f /path/to/lock.file
 ```
 ### Stale Worktree Cleanup
 ```bash
 cd "$PROJECT_REPO_ROOT"
 git worktree remove --force /tmp/stale-worktree 2>/dev/null || true
 git worktree prune 2>/dev/null || true
 ```
 ### Blocked Pipeline
 - Check if PR is awaiting review or CI
 - Verify no other agent is actively working on same issue
 - Check for unmet dependencies (issues with `Depends on` refs)
 ### Prevention
 - Concurrency bounded per LLM backend (AD-002)
 - Clear lock files in EXIT traps
 - Use phase files to track agent state
--- a/knowledge/disk.md
+++ b/knowledge/disk.md
@ -0,0 +1,35 @@
 # Disk Management — Best Practices
 ## Disk Pressure Response (P1)
 When disk usage exceeds 80%, take these actions in order:
 ### Immediate Actions
 1. **Docker cleanup** (safe, low impact):
   ```bash
   sudo docker system prune -f
   ```
 2. **Aggressive Docker cleanup** (if still >80%):
   ```bash
   sudo docker system prune -a -f
   ```
   This removes unused images in addition to containers/volumes.
 3. **Log rotation**:
   ```bash
   for f in "$FACTORY_ROOT"/{dev,review,supervisor,gardener,planner,predictor}/*.log; do
     [ -f "$f" ] && [ "$(du -k "$f" | cut -f1)" -gt 10240 ] && truncate -s 0 "$f"
   done
   ```
 ### Prevention
 - Monitor disk with alerts at 70% (warning) and 80% (critical)
 - Set up automatic log rotation for agent logs
 - Clean up old Docker images regularly
 - Consider using separate partitions for `/var/lib/docker`
 ### When to Escalate
 - Disk stays >80% after cleanup (indicates legitimate growth)
 - No unused Docker images to clean
 - Critical data filling disk (check /home, /var/log)
--- a/knowledge/forge.md
+++ b/knowledge/forge.md
@ -0,0 +1,25 @@
 # Forgejo Operations — Best Practices
 ## Forgejo Issues
 When Forgejo operations encounter issues:
 ### API Rate Limits
 - Monitor rate limit headers in API responses
 - Implement exponential backoff on 429 responses
 - Use agent-specific tokens (#747) to increase limits
 ### Authentication Issues
 - Verify FORGE_TOKEN is valid and not expired
 - Check agent identity matches token (#747)
 - Use FORGE_<AGENT>_TOKEN for agent-specific identities
 ### Repository Access
 - Verify FORGE_REMOTE matches actual git remote
 - Check token has appropriate permissions (repo, write)
 - Use `resolve_forge_remote()` to auto-detect remote
 ### Prevention
 - Set up monitoring for API failures
 - Rotate tokens before expiry
 - Document required permissions per agent
--- a/knowledge/git.md
+++ b/knowledge/git.md
@ -0,0 +1,28 @@
 # Git State Recovery — Best Practices
 ## Git State Issues (P2)
 When git repo is on wrong branch or in broken rebase state:
 ### Wrong Branch Recovery
 ```bash
 cd "$PROJECT_REPO_ROOT"
 git checkout "$PRIMARY_BRANCH" 2>/dev/null || git checkout master 2>/dev/null
 ```
 ### Broken Rebase Recovery
 ```bash
 cd "$PROJECT_REPO_ROOT"
 git rebase --abort 2>/dev/null || true
 git checkout "$PRIMARY_BRANCH" 2>/dev/null || git checkout master 2>/dev/null
 ```
 ### Stale Lock File Cleanup
 ```bash
 rm -f /path/to/stale.lock
 ```
 ### Prevention
 - Always checkout primary branch after rebase conflicts
 - Remove lock files after agent sessions complete
 - Use `git status` to verify repo state before operations
--- a/knowledge/memory.md
+++ b/knowledge/memory.md
@ -0,0 +1,27 @@
 # Memory Management — Best Practices
 ## Memory Crisis Response (P0)
 When RAM available drops below 500MB or swap usage exceeds 3GB, take these actions:
 ### Immediate Actions
 1. **Kill stale claude processes** (>3 hours old):
   ```bash
   pgrep -f "claude -p" --older 10800 2>/dev/null | xargs kill 2>/dev/null || true
   ```
 2. **Drop filesystem caches**:
   ```bash
   sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1 || true
   ```
 ### Prevention
 - Set memory_guard to 2000MB minimum (default in env.sh)
 - Configure swap usage alerts at 2GB
 - Monitor for memory leaks in long-running processes
 - Use cgroups for process memory limits
 ### When to Escalate
 - RAM stays <500MB after cache drop
 - Swap continues growing after process kills
 - System becomes unresponsive (OOM killer active)
--- a/knowledge/review-agent.md
+++ b/knowledge/review-agent.md
@ -0,0 +1,23 @@
 # Review Agent — Best Practices
 ## Review Agent Issues
 When review agent encounters issues with PRs:
 ### Stale PR Handling
 - PRs stale >20min (CI done, no push since) → file vault item for dev-agent
 - Do NOT push branches or attempt merges directly
 - File vault item with:
  - What: Stale PR requiring push
  - Why: Factory degraded
  - Unblocks: dev-agent will push the branch
 ### Circular Dependencies
 - Check backlog for issues with circular `Depends on` refs
 - Use `lib/parse-deps.sh` to analyze dependency graph
 - Report to planner for resolution
 ### Prevention
 - Review agent only reads PRs, never modifies
 - Use vault items for actions requiring dev-agent
 - Monitor for PRs stuck in review state
--- a/lib/AGENTS.md
+++ b/lib/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: 33eb565d7e0c5b7e0159e1720ba7f79126a7e25e -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Shared Helpers (`lib/`)
 All agents source `lib/env.sh` as their first action. Additional helpers are
@ -6,20 +6,29 @@ sourced as needed.
 | File | What it provides | Sourced by |
 |---|---|---|
-| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. | Every agent |
+| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Required env var**: `FORGE_PASS` — bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). | Every agent |
-| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr, supervisor-poll |
+| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr |
 | `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) |
-| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) |
+| `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. `<pipeline_number> [--step <name>]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh |
-| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
+| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). **Container path derivation**: `PROJECT_REPO_ROOT` and `OPS_REPO_ROOT` are derived at runtime when `DISINTO_CONTAINER=1` — hardcoded to `/home/agent/repos/$PROJECT_NAME` and `/home/agent/repos/$PROJECT_NAME-ops` respectively — not read from the TOML. This ensures correct paths inside containers where host paths in the TOML would be wrong. | env.sh (when `PROJECT_TOML` is set) |
-| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `build_graph_section()`, `run_formula_and_monitor(AGENT [TIMEOUT] [CALLBACK])` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `formula_phase_callback()` handles `PHASE:escalate` (unified escalation path — kills the session). `run_formula_and_monitor` accepts an optional CALLBACK (default: `formula_phase_callback`) so callers can install custom merge-through or escalation handlers. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh |
+| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll |
-| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points |
+| `lib/formula-session.sh` | `acquire_run_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven polling-loop agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh |
-| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh and dev/phase-handler.sh — called after every successful merge. | dev-poll.sh, phase-handler.sh |
+| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in loop logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | polling-loop entry points |
 | `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh |
 | `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh |
-| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh |
+| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh |
-| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) |
+| `lib/stack-lock.sh` | File-based lock protocol for singleton project stack access. `stack_lock_acquire(holder, project)` — polls until free, breaks stale heartbeats (>10 min old), claims lock. `stack_lock_release(project)` — deletes lock file. `stack_lock_check(project)` — inspect current lock state. `stack_lock_heartbeat(project)` — update heartbeat timestamp (callers must call every 2 min while holding). Lock files at `~/data/locks/<project>-stack.lock`. | docker/edge/dispatcher.sh, reproduce formula |
 | `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) |
 | `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)` — `git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh |
 | `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future) |
 | `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future) |
-| `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `phase_file` is set, passes it to the idle stop hook (`on-idle-stop.sh`) so the hook can **nudge Claude** (up to 2 times) if Claude returns to the prompt without writing to the phase file — the hook injects a tmux reminder asking Claude to signal PHASE:done or PHASE:awaiting_ci. The PreToolUse guard hook (`on-pretooluse-guard.sh`) receives the session name as a third argument — formula agents (`gardener-*`, `planner-*`, `predictor-*`, `supervisor-*`) are identified this way and allowed to access `FACTORY_ROOT` from worktrees (they need env.sh, AGENTS.md, formulas/, lib/). **OAuth flock**: when `DISINTO_CONTAINER=1`, Claude CLI is wrapped in `flock -w 300 ~/.claude/session.lock` to queue concurrent token refresh attempts and prevent rotation races across agents sharing the same credentials. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `>` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or `PHASE:escalate` / other `PHASE:*` string. **Unified escalation**: `PHASE:escalate` is the signal that a session needs human input (renamed from `PHASE:needs_human`). **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh |
+| `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request <action_id> <toml_content>` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/<action-id>`, writes `vault/actions/<action-id>.toml`, creates PR targeting `main` with title `vault: <action-id>` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. **Low-tier bypass**: if the action's `blast_radius` classifies as `low` (via `vault/classify.sh`), `vault_request` calls `_vault_commit_direct()` which commits directly to ops `main` using `FORGE_ADMIN_TOKEN` — no PR, no approval wait. Returns `0` (not a PR number) for direct commits. Requires `FORGE_TOKEN`, `FORGE_ADMIN_TOKEN` (low-tier only), `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher |
-| `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request <action_id> <toml_content>` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/<action-id>`, writes `vault/actions/<action-id>.toml`, creates PR targeting `main` with title `vault: <action-id>` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher |
+| `lib/branch-protection.sh` | Branch protection helpers for Forgejo repos. `setup_vault_branch_protection()` — configures admin-only merge protection on main (require 1 approval, restrict merge to admin role, block direct pushes). `setup_profile_branch_protection()` — same protection for `.profile` repos. `verify_branch_protection()` — checks protection is correctly configured. `remove_branch_protection()` — removes protection (cleanup/testing). Handles race condition after initial push: retries with backoff if Forgejo hasn't processed the branch yet. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_OPS_REPO`. | bin/disinto (hire-an-agent) |
 | `lib/agent-sdk.sh` | `agent_run([--resume SESSION_ID] [--worktree DIR] PROMPT)` — one-shot `claude -p` invocation with session persistence. Saves session ID to `SID_FILE`, reads it back on resume. `agent_recover_session()` — restore previous session ID from `SID_FILE` on startup. **Nudge guard**: skips nudge injection if the worktree is clean and no push is expected, preventing spurious re-invocations. Callers must define `SID_FILE`, `LOGFILE`, and `log()` before sourcing. | formula-driven agents (dev-agent, planner-run, predictor-run, gardener-run) |
 | `lib/forge-setup.sh` | `setup_forge()` — Forgejo instance provisioning: creates admin user, bot accounts, org, repos (code + ops), configures webhooks, sets repo topics. Extracted from `bin/disinto`. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`. **Password storage (#361)**: after creating each bot account, stores its password in `.env` as `FORGE_<BOT>_PASS` (e.g. `FORGE_PASS`, `FORGE_REVIEW_PASS`, etc.) for use by `forge-push.sh`. | bin/disinto (init) |
 | `lib/forge-push.sh` | `push_to_forge()` — pushes a local clone to the Forgejo remote and verifies the push. `_assert_forge_push_globals()` validates required env vars before use. Requires `FORGE_URL`, `FORGE_PASS`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. **Auth**: uses `FORGE_PASS` (bot password) for git HTTP push — Forgejo 11.x rejects API tokens for `git push` (#361). | bin/disinto (init) |
 | `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence, sprints). Evidence subdirectories seeded: engagement/, red-team/, holdout/, evolution/, user-test/. Also seeds sprints/ for architect output. Exports `_ACTUAL_OPS_SLUG`. `migrate_ops_repo(ops_root, [primary_branch])` — idempotent migration helper that seeds missing directories and .gitkeep files on existing ops repos (pre-#407 deployments). | bin/disinto (init) |
 | `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for bare-metal deployments (compose mode uses polling loop instead). `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) |
 | `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (uses `codeberg.org/forgejo/forgejo:11.0` tag; adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility), `generate_caddyfile()` — Caddyfile, `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) |
 | `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) |
 | `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) |
--- a/lib/agent-sdk.sh
+++ b/lib/agent-sdk.sh
@ -46,9 +46,23 @@ agent_run() {
  [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL")
  local run_dir="${worktree_dir:-$(pwd)}"
-  local output
+  local lock_file="${HOME}/.claude/session.lock"
  mkdir -p "$(dirname "$lock_file")"
  local output rc
  log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})"
-  output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true
+  output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$?
  if [ "$rc" -eq 124 ]; then
    log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)"
  elif [ "$rc" -ne 0 ]; then
    log "agent_run: claude exited with code $rc"
    # Log last 3 lines of output for diagnostics
    if [ -n "$output" ]; then
      log "agent_run: last output lines: $(echo "$output" | tail -3)"
    fi
  fi
  if [ -z "$output" ]; then
    log "agent_run: empty output (claude may have crashed or failed, exit code: $rc)"
  fi
  # Extract and persist session_id
  local new_sid
@ -66,7 +80,7 @@ agent_run() {
  # Nudge: if the model stopped without pushing, resume with encouragement.
  # Some models emit end_turn prematurely when confused. A nudge often unsticks them.
-  if [ -n "$_AGENT_SESSION_ID" ]; then
+  if [ -n "$_AGENT_SESSION_ID" ] && [ -n "$output" ]; then
    local has_changes
    has_changes=$(cd "$run_dir" && git status --porcelain 2>/dev/null | head -1) || true
    local has_pushed
@ -76,7 +90,17 @@ agent_run() {
        # Nudge: there are uncommitted changes
        local nudge="You stopped but did not push any code. You have uncommitted changes. Commit them and push."
        log "agent_run: nudging (uncommitted changes)"
-        output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") || true
+        local nudge_rc
        output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
        if [ "$nudge_rc" -eq 124 ]; then
          log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)"
        elif [ "$nudge_rc" -ne 0 ]; then
          log "agent_run: nudge claude exited with code $nudge_rc"
          # Log last 3 lines of output for diagnostics
          if [ -n "$output" ]; then
            log "agent_run: nudge last output lines: $(echo "$output" | tail -3)"
          fi
        fi
        new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true
        if [ -n "$new_sid" ]; then
          _AGENT_SESSION_ID="$new_sid"
--- a/lib/agent-session.sh
+++ b/lib/agent-session.sh
@ -1,486 +0,0 @@
 #!/usr/bin/env bash
 # agent-session.sh — Shared tmux + Claude interactive session helpers
 #
 # Source this into agent orchestrator scripts for reusable session management.
 #
 # Functions:
 #   agent_wait_for_claude_ready SESSION_NAME [TIMEOUT_SECS]
 #   agent_inject_into_session   SESSION_NAME TEXT
 #   agent_kill_session          SESSION_NAME
 #   monitor_phase_loop          PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME]
 #   session_lock_acquire        [TIMEOUT_SECS]
 #   session_lock_release
 # --- Cooperative session lock (fd-based) ---
 # File descriptor for the session lock. Set by create_agent_session().
 # Callers can release/re-acquire via session_lock_release/session_lock_acquire
 # to allow other Claude sessions during idle phases (awaiting_review/awaiting_ci).
 SESSION_LOCK_FD=""
 # Release the session lock without closing the file descriptor.
 # The fd stays open so it can be re-acquired later.
 session_lock_release() {
  if [ -n "${SESSION_LOCK_FD:-}" ]; then
    flock -u "$SESSION_LOCK_FD"
  fi
 }
 # Re-acquire the session lock. Blocks until available or timeout.
 # Opens the lock fd if not already open (for use by external callers).
 # Args: [timeout_secs] (default 300)
 # Returns 0 on success, 1 on timeout/error.
 # shellcheck disable=SC2120  # timeout arg is used by external callers
 session_lock_acquire() {
  local timeout="${1:-300}"
  if [ -z "${SESSION_LOCK_FD:-}" ]; then
    local lock_dir="${HOME}/.claude"
    mkdir -p "$lock_dir"
    exec {SESSION_LOCK_FD}>>"${lock_dir}/session.lock"
  fi
  flock -w "$timeout" "$SESSION_LOCK_FD"
 }
 # Wait for the Claude ❯ ready prompt in a tmux pane.
 # Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise.
 agent_wait_for_claude_ready() {
  local session="$1"
  local timeout="${2:-120}"
  local elapsed=0
  while [ "$elapsed" -lt "$timeout" ]; do
    if tmux capture-pane -t "$session" -p 2>/dev/null | grep -q '❯'; then
      return 0
    fi
    sleep 2
    elapsed=$((elapsed + 2))
  done
  return 1
 }
 # Paste TEXT into SESSION (waits for Claude to be ready first), then press Enter.
 agent_inject_into_session() {
  local session="$1"
  local text="$2"
  local tmpfile
  # Re-acquire session lock before injecting — Claude will resume working
  # shellcheck disable=SC2119  # using default timeout
  session_lock_acquire || true
  agent_wait_for_claude_ready "$session" 120 || true
  # Clear idle marker — new work incoming
  rm -f "/tmp/claude-idle-${session}.ts"
  tmpfile=$(mktemp /tmp/agent-inject-XXXXXX)
  printf '%s' "$text" > "$tmpfile"
  tmux load-buffer -b "agent-inject-$$" "$tmpfile"
  tmux paste-buffer -t "$session" -b "agent-inject-$$"
  sleep 0.5
  tmux send-keys -t "$session" "" Enter
  tmux delete-buffer -b "agent-inject-$$" 2>/dev/null || true
  rm -f "$tmpfile"
 }
 # Create a tmux session running Claude in the given workdir.
 # Installs a Stop hook for idle detection (see monitor_phase_loop).
 # Installs a PreToolUse hook to guard destructive Bash operations.
 # Optionally installs a PostToolUse hook for phase file write detection.
 # Optionally installs a StopFailure hook for immediate phase file update on API error.
 # Args: session workdir [phase_file]
 # Returns 0 if session is ready, 1 otherwise.
 create_agent_session() {
  local session="$1"
  local workdir="${2:-.}"
  local phase_file="${3:-}"
  # Prepare settings directory for hooks
  mkdir -p "${workdir}/.claude"
  local settings="${workdir}/.claude/settings.json"
  # Install Stop hook for idle detection: when Claude finishes a response,
  # the hook writes a timestamp to a marker file. monitor_phase_loop checks
  # this marker instead of fragile tmux pane scraping.
  local idle_marker="/tmp/claude-idle-${session}.ts"
  local hook_script="${FACTORY_ROOT}/lib/hooks/on-idle-stop.sh"
  if [ -x "$hook_script" ]; then
    local hook_cmd="${hook_script} ${idle_marker}"
    # When a phase file is available, pass it and the session name so the
    # hook can nudge Claude if it returns to the prompt without signalling.
    if [ -n "$phase_file" ]; then
      hook_cmd="${hook_script} ${idle_marker} ${phase_file} ${session}"
    fi
    if [ -f "$settings" ]; then
      # Append our Stop hook to existing project settings
      jq --arg cmd "$hook_cmd" '
        if (.hooks.Stop // [] | any(.[]; .hooks[]?.command == $cmd))
        then .
        else .hooks.Stop = (.hooks.Stop // []) + [{
          matcher: "",
          hooks: [{type: "command", command: $cmd}]
        }]
        end
      ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
    else
      jq -n --arg cmd "$hook_cmd" '{
        hooks: {
          Stop: [{
            matcher: "",
            hooks: [{type: "command", command: $cmd}]
          }]
        }
      }' > "$settings"
    fi
  fi
  # Install PostToolUse hook for phase file write detection: when Claude
  # writes to the phase file via Bash or Write, the hook writes a marker
  # so monitor_phase_loop can react immediately instead of waiting for
  # the next mtime-based poll cycle.
  if [ -n "$phase_file" ]; then
    local phase_marker="/tmp/phase-changed-${session}.marker"
    local phase_hook_script="${FACTORY_ROOT}/lib/hooks/on-phase-change.sh"
    if [ -x "$phase_hook_script" ]; then
      local phase_hook_cmd="${phase_hook_script} ${phase_file} ${phase_marker}"
      if [ -f "$settings" ]; then
        jq --arg cmd "$phase_hook_cmd" '
          if (.hooks.PostToolUse // [] | any(.[]; .hooks[]?.command == $cmd))
          then .
          else .hooks.PostToolUse = (.hooks.PostToolUse // []) + [{
            matcher: "Bash|Write",
            hooks: [{type: "command", command: $cmd}]
          }]
          end
        ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
      else
        jq -n --arg cmd "$phase_hook_cmd" '{
          hooks: {
            PostToolUse: [{
              matcher: "Bash|Write",
              hooks: [{type: "command", command: $cmd}]
            }]
          }
        }' > "$settings"
      fi
      rm -f "$phase_marker"
    fi
  fi
  # Install StopFailure hook for immediate phase file update on API error:
  # when Claude hits a rate limit, server error, billing error, or auth failure,
  # the hook writes PHASE:failed to the phase file and touches the phase-changed
  # marker so monitor_phase_loop picks it up within one poll cycle instead of
  # waiting for idle timeout (up to 2 hours).
  if [ -n "$phase_file" ]; then
    local stop_failure_hook_script="${FACTORY_ROOT}/lib/hooks/on-stop-failure.sh"
    if [ -x "$stop_failure_hook_script" ]; then
      # phase_marker is defined in the PostToolUse block above; redeclare so
      # this block is self-contained if that block is ever removed.
      local sf_phase_marker="/tmp/phase-changed-${session}.marker"
      local stop_failure_hook_cmd="${stop_failure_hook_script} ${phase_file} ${sf_phase_marker}"
      if [ -f "$settings" ]; then
        jq --arg cmd "$stop_failure_hook_cmd" '
          if (.hooks.StopFailure // [] | any(.[]; .hooks[]?.command == $cmd))
          then .
          else .hooks.StopFailure = (.hooks.StopFailure // []) + [{
            matcher: "rate_limit|server_error|authentication_failed|billing_error",
            hooks: [{type: "command", command: $cmd}]
          }]
          end
        ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
      else
        jq -n --arg cmd "$stop_failure_hook_cmd" '{
          hooks: {
            StopFailure: [{
              matcher: "rate_limit|server_error|authentication_failed|billing_error",
              hooks: [{type: "command", command: $cmd}]
            }]
          }
        }' > "$settings"
      fi
    fi
  fi
  # Install PreToolUse hook for destructive operation guard: blocks force push
  # to primary branch, rm -rf outside worktree, direct API merge calls, and
  # checkout/switch to primary branch.  Claude sees the denial reason on exit 2
  # and can self-correct.
  local guard_hook_script="${FACTORY_ROOT}/lib/hooks/on-pretooluse-guard.sh"
  if [ -x "$guard_hook_script" ]; then
    local abs_workdir
    abs_workdir=$(cd "$workdir" 2>/dev/null && pwd) || abs_workdir="$workdir"
    local guard_hook_cmd="${guard_hook_script} ${PRIMARY_BRANCH:-main} ${abs_workdir} ${session}"
    if [ -f "$settings" ]; then
      jq --arg cmd "$guard_hook_cmd" '
        if (.hooks.PreToolUse // [] | any(.[]; .hooks[]?.command == $cmd))
        then .
        else .hooks.PreToolUse = (.hooks.PreToolUse // []) + [{
          matcher: "Bash",
          hooks: [{type: "command", command: $cmd}]
        }]
        end
      ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
    else
      jq -n --arg cmd "$guard_hook_cmd" '{
        hooks: {
          PreToolUse: [{
            matcher: "Bash",
            hooks: [{type: "command", command: $cmd}]
          }]
        }
      }' > "$settings"
    fi
  fi
  # Install SessionEnd hook for guaranteed cleanup: when the Claude session
  # exits (clean or crash), write a termination marker so monitor_phase_loop
  # detects the exit faster than tmux has-session polling alone.
  local exit_marker="/tmp/claude-exited-${session}.ts"
  local session_end_hook_script="${FACTORY_ROOT}/lib/hooks/on-session-end.sh"
  if [ -x "$session_end_hook_script" ]; then
    local session_end_hook_cmd="${session_end_hook_script} ${exit_marker}"
    if [ -f "$settings" ]; then
      jq --arg cmd "$session_end_hook_cmd" '
        if (.hooks.SessionEnd // [] | any(.[]; .hooks[]?.command == $cmd))
        then .
        else .hooks.SessionEnd = (.hooks.SessionEnd // []) + [{
          matcher: "",
          hooks: [{type: "command", command: $cmd}]
        }]
        end
      ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
    else
      jq -n --arg cmd "$session_end_hook_cmd" '{
        hooks: {
          SessionEnd: [{
            matcher: "",
            hooks: [{type: "command", command: $cmd}]
          }]
        }
      }' > "$settings"
    fi
  fi
  rm -f "$exit_marker"
  # Install SessionStart hook for context re-injection after compaction:
  # when Claude Code compacts context during long sessions, the phase protocol
  # instructions are lost. This hook fires after each compaction and outputs
  # the content of a context file so Claude retains critical instructions.
  # The context file is written by callers via write_compact_context().
  if [ -n "$phase_file" ]; then
    local compact_hook_script="${FACTORY_ROOT}/lib/hooks/on-compact-reinject.sh"
    if [ -x "$compact_hook_script" ]; then
      local context_file="${phase_file%.phase}.context"
      local compact_hook_cmd="${compact_hook_script} ${context_file}"
      if [ -f "$settings" ]; then
        jq --arg cmd "$compact_hook_cmd" '
          if (.hooks.SessionStart // [] | any(.[]; .hooks[]?.command == $cmd))
          then .
          else .hooks.SessionStart = (.hooks.SessionStart // []) + [{
            matcher: "compact",
            hooks: [{type: "command", command: $cmd}]
          }]
          end
        ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
      else
        jq -n --arg cmd "$compact_hook_cmd" '{
          hooks: {
            SessionStart: [{
              matcher: "compact",
              hooks: [{type: "command", command: $cmd}]
            }]
          }
        }' > "$settings"
      fi
    fi
  fi
  rm -f "$idle_marker"
  local model_flag=""
  if [ -n "${CLAUDE_MODEL:-}" ]; then
    model_flag="--model ${CLAUDE_MODEL}"
  fi
  # Acquire a session-level mutex via fd-based flock to prevent concurrent
  # Claude sessions from racing on OAuth token refresh.  Unlike the previous
  # command-wrapper flock, the fd approach allows callers to release the lock
  # during idle phases (awaiting_review/awaiting_ci) and re-acquire before
  # injecting the next prompt.  See #724.
  # Use ~/.claude/session.lock so the lock is shared across containers when
  # the host ~/.claude directory is bind-mounted.
  local lock_dir="${HOME}/.claude"
  mkdir -p "$lock_dir"
  local claude_lock="${lock_dir}/session.lock"
  if [ -z "${SESSION_LOCK_FD:-}" ]; then
    exec {SESSION_LOCK_FD}>>"${claude_lock}"
  fi
  if ! flock -w 300 "$SESSION_LOCK_FD"; then
    return 1
  fi
  local claude_cmd="claude --dangerously-skip-permissions ${model_flag}"
  tmux new-session -d -s "$session" -c "$workdir" \
    "$claude_cmd" 2>/dev/null
  sleep 1
  tmux has-session -t "$session" 2>/dev/null || return 1
  agent_wait_for_claude_ready "$session" 120 || return 1
  return 0
 }
 # Inject a prompt/formula into a session (alias for agent_inject_into_session).
 inject_formula() {
  agent_inject_into_session "$@"
 }
 # Monitor a phase file, calling a callback on changes and handling idle timeout.
 # Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, crashed, PHASE:failed, PHASE:escalate).
 # Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME).
 #   Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly.
 # Args: phase_file idle_timeout_secs callback_fn [session_name]
 #   session_name — tmux session to health-check; falls back to $SESSION_NAME global
 #
 # Idle detection: uses a Stop hook marker file (written by lib/hooks/on-idle-stop.sh)
 # to detect when Claude finishes responding without writing a phase signal.
 # If the marker exists for 3 consecutive polls with no phase written, the session
 # is killed and the callback invoked with "PHASE:failed".
 monitor_phase_loop() {
  local phase_file="$1"
  local idle_timeout="$2"
  local callback="$3"
  local _session="${4:-${SESSION_NAME:-}}"
  # Export resolved session name so callbacks can reference it regardless of
  # which session was passed to monitor_phase_loop (analogous to _MONITOR_LOOP_EXIT).
  export _MONITOR_SESSION="$_session"
  local poll_interval="${PHASE_POLL_INTERVAL:-10}"
  local last_mtime=0
  local idle_elapsed=0
  local idle_pane_count=0
  while true; do
    sleep "$poll_interval"
    idle_elapsed=$(( idle_elapsed + poll_interval ))
    # Session health check: SessionEnd hook marker provides fast detection,
    # tmux has-session is the fallback for unclean exits (e.g. tmux crash).
    local exit_marker="/tmp/claude-exited-${_session}.ts"
    if [ -f "$exit_marker" ] || ! tmux has-session -t "${_session}" 2>/dev/null; then
      local current_phase
      current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
      case "$current_phase" in
        PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate)
          ;; # terminal — fall through to phase handler
        *)
          # Call callback with "crashed" — let agent-specific code handle recovery
          if type "${callback}" &>/dev/null; then
            "$callback" "PHASE:crashed"
          fi
          # If callback didn't restart session, break
          if ! tmux has-session -t "${_session}" 2>/dev/null; then
            _MONITOR_LOOP_EXIT="crashed"
            return 1
          fi
          idle_elapsed=0
          idle_pane_count=0
          continue
          ;;
      esac
    fi
    # Check phase-changed marker from PostToolUse hook — if present, the hook
    # detected a phase file write so we reset last_mtime to force processing
    # this cycle instead of waiting for the next mtime change.
    local phase_marker="/tmp/phase-changed-${_session}.marker"
    if [ -f "$phase_marker" ]; then
      rm -f "$phase_marker"
      last_mtime=0
    fi
    # Check phase file for changes
    local phase_mtime
    phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0)
    local current_phase
    current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
    if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$last_mtime" ]; then
      # No phase change — check idle timeout
      if [ "$idle_elapsed" -ge "$idle_timeout" ]; then
        _MONITOR_LOOP_EXIT="idle_timeout"
        agent_kill_session "${_session}"
        return 0
      fi
      # Idle detection via Stop hook: the on-idle-stop.sh hook writes a marker
      # file when Claude finishes a response. If the marker exists and no phase
      # has been written, Claude returned to the prompt without following the
      # phase protocol. 3 consecutive polls = confirmed idle (not mid-turn).
      local idle_marker="/tmp/claude-idle-${_session}.ts"
      if [ -z "$current_phase" ] && [ -f "$idle_marker" ]; then
        idle_pane_count=$(( idle_pane_count + 1 ))
        if [ "$idle_pane_count" -ge 3 ]; then
          _MONITOR_LOOP_EXIT="idle_prompt"
          # Session is killed before the callback is invoked.
          # Callbacks that handle PHASE:failed must not assume the session is alive.
          agent_kill_session "${_session}"
          if type "${callback}" &>/dev/null; then
            "$callback" "PHASE:failed"
          fi
          return 0
        fi
      else
        idle_pane_count=0
      fi
      continue
    fi
    # Phase changed
    last_mtime="$phase_mtime"
    # shellcheck disable=SC2034  # read by phase-handler.sh callback
    LAST_PHASE_MTIME="$phase_mtime"
    idle_elapsed=0
    idle_pane_count=0
    # Terminal phases
    case "$current_phase" in
      PHASE:done|PHASE:merged)
        _MONITOR_LOOP_EXIT="done"
        if type "${callback}" &>/dev/null; then
          "$callback" "$current_phase"
        fi
        return 0
        ;;
      PHASE:failed|PHASE:escalate)
        _MONITOR_LOOP_EXIT="$current_phase"
        if type "${callback}" &>/dev/null; then
          "$callback" "$current_phase"
        fi
        return 0
        ;;
    esac
    # Non-terminal phase — call callback
    if type "${callback}" &>/dev/null; then
      "$callback" "$current_phase"
    fi
  done
 }
 # Write context to a file for re-injection after context compaction.
 # The SessionStart compact hook reads this file and outputs it to stdout.
 # Args: phase_file content
 write_compact_context() {
  local phase_file="$1"
  local content="$2"
  local context_file="${phase_file%.phase}.context"
  printf '%s\n' "$content" > "$context_file"
 }
 # Kill a tmux session gracefully (no-op if not found).
 agent_kill_session() {
  local session="${1:-}"
  [ -n "$session" ] && tmux kill-session -t "$session" 2>/dev/null || true
  rm -f "/tmp/claude-idle-${session}.ts"
  rm -f "/tmp/phase-changed-${session}.marker"
  rm -f "/tmp/claude-exited-${session}.ts"
  rm -f "/tmp/claude-nudge-${session}.count"
 }
 # Read the current phase from a phase file, stripped of whitespace.
 # Usage: read_phase [file]  — defaults to $PHASE_FILE
 read_phase() {
  local file="${1:-${PHASE_FILE:-}}"
  { cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]'
 }
--- a/lib/branch-protection.sh
+++ b/lib/branch-protection.sh
@ -34,6 +34,55 @@ _ops_api() {
  printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}"
 }
 # -----------------------------------------------------------------------------
 # _bp_wait_for_branch — Wait for Forgejo to index a branch with exponential backoff
 #
 # Forgejo's branch indexer can take 5–15s to register a newly-pushed branch.
 # This helper retries up to 10 times with exponential backoff (2s, 4s, 6s, …)
 # capped at 10s per wait, for a worst-case total of ~70s.
 #
 # Args:
 #   $1 - Full API URL for the repo (e.g. https://forge.example/api/v1/repos/owner/repo)
 #   $2 - Branch name
 #   $3 - Human-readable repo identifier for log messages
 #
 # Returns: 0 if branch found, 1 if not found after all retries
 # -----------------------------------------------------------------------------
 _bp_wait_for_branch() {
  local api_url="$1"
  local branch="$2"
  local repo_label="$3"
  local max_retries=10
  local base_wait=2
  local attempt=1
  local branch_status="0"
  while [ "$attempt" -le "$max_retries" ]; do
    branch_status=$(curl -s -o /dev/null -w "%{http_code}" \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
    if [ "$branch_status" = "200" ]; then
      _bp_log "Branch ${branch} exists on ${repo_label}"
      return 0
    fi
    if [ "$attempt" -lt "$max_retries" ]; then
      local wait_time=$(( base_wait * attempt ))
      if [ "$wait_time" -gt 10 ]; then
        wait_time=10
      fi
      _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_retries}), waiting ${wait_time}s..."
      sleep "$wait_time"
    fi
    attempt=$((attempt + 1))
  done
  _bp_log "ERROR: Branch ${branch} does not exist on ${repo_label} after ${max_retries} attempts"
  return 1
 }
 # -----------------------------------------------------------------------------
 # setup_vault_branch_protection — Set up admin-only branch protection for main
 #
@ -51,14 +100,8 @@ setup_vault_branch_protection() {
  _bp_log "Setting up branch protection for ${branch} on ${FORGE_OPS_REPO}"
-  # Check if branch exists
+  # Wait for Forgejo to index the branch (may take 5–15s after push)
-  local branch_exists
+  if ! _bp_wait_for_branch "$api_url" "$branch" "$FORGE_OPS_REPO"; then
  branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
  if [ "$branch_exists" != "200" ]; then
    _bp_log "ERROR: Branch ${branch} does not exist"
    return 1
  fi
@ -228,14 +271,8 @@ setup_profile_branch_protection() {
  local api_url
  api_url="${FORGE_URL}/api/v1/repos/${repo}"
-  # Check if branch exists
+  # Wait for Forgejo to index the branch (may take 5–15s after push)
-  local branch_exists
+  if ! _bp_wait_for_branch "$api_url" "$branch" "$repo"; then
  branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
  if [ "$branch_exists" != "200" ]; then
    _bp_log "ERROR: Branch ${branch} does not exist on ${repo}"
    return 1
  fi
@ -379,7 +416,7 @@ remove_branch_protection() {
 # - Allow review-bot to approve PRs
 #
 # Args:
-#   $1 - Repo path in format 'owner/repo' (e.g., 'johba/disinto')
+#   $1 - Repo path in format 'owner/repo' (e.g., 'disinto-admin/disinto')
 #   $2 - Branch to protect (default: main)
 #
 # Returns: 0 on success, 1 on failure
@ -398,14 +435,8 @@ setup_project_branch_protection() {
  local api_url
  api_url="${FORGE_URL}/api/v1/repos/${repo}"
-  # Check if branch exists
+  # Wait for Forgejo to index the branch (may take 5–15s after push)
-  local branch_exists
+  if ! _bp_wait_for_branch "$api_url" "$branch" "$repo"; then
  branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
  if [ "$branch_exists" != "200" ]; then
    _bp_log "ERROR: Branch ${branch} does not exist on ${repo}"
    return 1
  fi
@ -536,7 +567,7 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
      echo "Required environment variables:"
      echo "  FORGE_TOKEN     Forgejo API token (admin user recommended)"
      echo "  FORGE_URL       Forgejo instance URL (e.g., https://codeberg.org)"
-      echo "  FORGE_OPS_REPO  Ops repo in format owner/repo (e.g., johba/disinto-ops)"
+      echo "  FORGE_OPS_REPO  Ops repo in format owner/repo (e.g., disinto-admin/disinto-ops)"
      exit 0
      ;;
  esac
--- a/lib/ci-helpers.sh
+++ b/lib/ci-helpers.sh
@ -7,27 +7,6 @@ set -euo pipefail
 # ci_commit_status() / ci_pipeline_number() require: woodpecker_api(), forge_api() (from env.sh)
 # classify_pipeline_failure() requires: woodpecker_api() (defined in env.sh)
 # ensure_blocked_label_id — look up (or create) the "blocked" label, print its ID.
 # Caches the result in _BLOCKED_LABEL_ID to avoid repeated API calls.
 # Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api()
 ensure_blocked_label_id() {
  if [ -n "${_BLOCKED_LABEL_ID:-}" ]; then
    printf '%s' "$_BLOCKED_LABEL_ID"
    return 0
  fi
  _BLOCKED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \
    | jq -r '.[] | select(.name == "blocked") | .id' 2>/dev/null || true)
  if [ -z "$_BLOCKED_LABEL_ID" ]; then
    _BLOCKED_LABEL_ID=$(curl -sf -X POST \
      -H "Authorization: token ${FORGE_TOKEN}" \
      -H "Content-Type: application/json" \
      "${FORGE_API}/labels" \
      -d '{"name":"blocked","color":"#e11d48"}' 2>/dev/null \
      | jq -r '.id // empty' 2>/dev/null || true)
  fi
  printf '%s' "$_BLOCKED_LABEL_ID"
 }
 # ensure_priority_label — look up (or create) the "priority" label, print its ID.
 # Caches the result in _PRIORITY_LABEL_ID to avoid repeated API calls.
 # Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api()
--- a/lib/ci-setup.sh
+++ b/lib/ci-setup.sh
@ -0,0 +1,456 @@
 #!/usr/bin/env bash
 # =============================================================================
 # ci-setup.sh — CI setup functions for Woodpecker and scheduling configuration
 #
 # Internal functions (called via _load_ci_context + _*_impl):
 #   _install_cron_impl()              - Install crontab entries (bare-metal only; compose uses polling loop)
 #   _create_woodpecker_oauth_impl()   - Create OAuth2 app on Forgejo for Woodpecker
 #   _generate_woodpecker_token_impl() - Auto-generate WOODPECKER_TOKEN via OAuth2 flow
 #   _activate_woodpecker_repo_impl()  - Activate repo in Woodpecker
 #
 # Globals expected (asserted by _load_ci_context):
 #   FORGE_URL    - Forge instance URL (e.g. http://localhost:3000)
 #   FORGE_TOKEN  - Forge API token
 #   FACTORY_ROOT - Root of the disinto factory
 #
 # Usage:
 #   source "${FACTORY_ROOT}/lib/ci-setup.sh"
 # =============================================================================
 set -euo pipefail
 # Assert required globals are set before using this module.
 _load_ci_context() {
  local missing=()
  [ -z "${FORGE_URL:-}" ]    && missing+=("FORGE_URL")
  [ -z "${FORGE_TOKEN:-}" ]  && missing+=("FORGE_TOKEN")
  [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
  if [ "${#missing[@]}" -gt 0 ]; then
    echo "Error: ci-setup.sh requires these globals to be set: ${missing[*]}" >&2
    exit 1
  fi
 }
 # Generate and optionally install cron entries for bare-metal deployments.
 # In compose mode, the agents container uses a polling loop (entrypoint.sh) instead.
 # Usage: install_cron <name> <toml_path> <auto_yes> <bare>
 _install_cron_impl() {
  local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}"
  # In compose mode, skip host cron — the agents container uses a polling loop
  if [ "$bare" = false ]; then
    echo ""
    echo "Cron:    skipped (agents container handles scheduling in compose mode)"
    return
  fi
  # Bare mode: crontab is required on the host
  if ! command -v crontab &>/dev/null; then
    echo "Warning: crontab not found (required for bare-metal scheduling)" >&2
    echo "  Install: apt install cron  /  brew install cron" >&2
    return 1
  fi
  # Use absolute path for the TOML in cron entries
  local abs_toml
  abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")"
  local cron_block
  cron_block="# disinto: ${name}
 2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1
 4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1
 0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1"
  echo ""
  echo "Cron entries to install:"
  echo "$cron_block"
  echo ""
  # Check if cron entries already exist
  local current_crontab
  current_crontab=$(crontab -l 2>/dev/null || true)
  if echo "$current_crontab" | grep -q "# disinto: ${name}"; then
    echo "Cron:    skipped (entries for ${name} already installed)"
    return
  fi
  if [ "$auto_yes" = false ] && [ -t 0 ]; then
    read -rp "Install these cron entries? [y/N] " confirm
    if [[ ! "$confirm" =~ ^[Yy] ]]; then
      echo "Skipped cron install. Add manually with: crontab -e"
      return
    fi
  fi
  # Append to existing crontab
  if { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab -; then
    echo "Cron entries installed for ${name}"
  else
    echo "Error: failed to install cron entries" >&2
    return 1
  fi
 }
 # Set up Woodpecker CI to use Forgejo as its forge backend.
 # Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo.
 # Usage: create_woodpecker_oauth <forge_url> <repo_slug>
 _create_woodpecker_oauth_impl() {
  local forge_url="$1"
  local _repo_slug="$2" # unused but required for signature compatibility
  echo ""
  echo "── Woodpecker OAuth2 setup ────────────────────────────"
  # Create OAuth2 application on Forgejo for Woodpecker
  local oauth2_name="woodpecker-ci"
  local redirect_uri="http://localhost:8000/authorize"
  local existing_app client_id client_secret
  # Check if OAuth2 app already exists
  existing_app=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \
    | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true
  if [ -n "$existing_app" ]; then
    echo "OAuth2:  ${oauth2_name} (already exists, client_id=${existing_app})"
    client_id="$existing_app"
  else
    local oauth2_resp
    oauth2_resp=$(curl -sf -X POST \
      -H "Authorization: token ${FORGE_TOKEN}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/user/applications/oauth2" \
      -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \
      2>/dev/null) || oauth2_resp=""
    if [ -z "$oauth2_resp" ]; then
      echo "Warning: failed to create OAuth2 app on Forgejo" >&2
      return
    fi
    client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty')
    client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty')
    if [ -z "$client_id" ]; then
      echo "Warning: OAuth2 app creation returned no client_id" >&2
      return
    fi
    echo "OAuth2:  ${oauth2_name} created (client_id=${client_id})"
  fi
  # Store Woodpecker forge config in .env
  # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references
  # WOODPECKER_HOST must be host-accessible URL to match OAuth2 redirect_uri
  local env_file="${FACTORY_ROOT}/.env"
  local wp_vars=(
    "WOODPECKER_FORGEJO=true"
    "WOODPECKER_FORGEJO_URL=${forge_url}"
    "WOODPECKER_HOST=http://localhost:8000"
  )
  if [ -n "${client_id:-}" ]; then
    wp_vars+=("WP_FORGEJO_CLIENT=${client_id}")
  fi
  if [ -n "${client_secret:-}" ]; then
    wp_vars+=("WP_FORGEJO_SECRET=${client_secret}")
  fi
  for var_line in "${wp_vars[@]}"; do
    local var_name="${var_line%%=*}"
    if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then
      sed -i "s|^${var_name}=.*|${var_line}|" "$env_file"
    else
      printf '%s\n' "$var_line" >> "$env_file"
    fi
  done
  echo "Config:  Woodpecker forge vars written to .env"
 }
 # Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow.
 # Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created).
 # Called after compose stack is up, before activate_woodpecker_repo.
 # Usage: generate_woodpecker_token <forge_url>
 _generate_woodpecker_token_impl() {
  local forge_url="$1"
  local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}"
  local env_file="${FACTORY_ROOT}/.env"
  local admin_user="disinto-admin"
  local admin_pass="${_FORGE_ADMIN_PASS:-}"
  # Skip if already set
  if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then
    echo "Config:  WOODPECKER_TOKEN already set in .env"
    return 0
  fi
  echo ""
  echo "── Woodpecker token generation ────────────────────────"
  if [ -z "$admin_pass" ]; then
    echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2
    echo "  Log into Woodpecker at ${wp_server} and create a token manually" >&2
    return 1
  fi
  # Wait for Woodpecker to become ready
  echo -n "Waiting for Woodpecker"
  local retries=0
  while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do
    retries=$((retries + 1))
    if [ "$retries" -gt 30 ]; then
      echo ""
      echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2
      return 1
    fi
    echo -n "."
    sleep 2
  done
  echo " ready"
  # Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token
  local cookie_jar auth_body_file
  cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX)
  auth_body_file=$(mktemp /tmp/wp-body-XXXXXX)
  # Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent)
  local csrf
  csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \
    | grep -o 'name="_csrf"[^>]*' | head -1 \
    | grep -oE '(content|value)="[^"]*"' | head -1 \
    | cut -d'"' -f2) || csrf=""
  if [ -z "$csrf" ]; then
    echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2
    rm -f "$cookie_jar" "$auth_body_file"
    return 1
  fi
  curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \
    -o /dev/null \
    "${forge_url}/user/login" \
    --data-urlencode "_csrf=${csrf}" \
    --data-urlencode "user_name=${admin_user}" \
    --data-urlencode "password=${admin_pass}" \
    2>/dev/null || true
  # Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param)
  local wp_redir
  wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \
    "${wp_server}/authorize" 2>/dev/null) || wp_redir=""
  if [ -z "$wp_redir" ]; then
    echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2
    rm -f "$cookie_jar" "$auth_body_file"
    return 1
  fi
  # Rewrite internal Docker network URLs to host-accessible URLs.
  # Handle both plain and URL-encoded forms of the internal hostnames.
  local forge_url_enc wp_server_enc
  forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g')
  wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g')
  wp_redir=$(printf '%s' "$wp_redir" \
    | sed "s|http://forgejo:3000|${forge_url}|g" \
    | sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \
    | sed "s|http://woodpecker:8000|${wp_server}|g" \
    | sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g")
  # Step 3: Hit Forgejo OAuth authorize endpoint with session
  # First time: shows consent page. Already approved: redirects with code.
  local auth_headers redirect_loc auth_code
  auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \
    -D - -o "$auth_body_file" \
    "$wp_redir" 2>/dev/null) || auth_headers=""
  redirect_loc=$(printf '%s' "$auth_headers" \
    | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}')
  if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then
    # Auto-approved: extract code from redirect
    auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/')
  else
    # Consent page: extract CSRF and all form fields, POST grant approval
    local consent_csrf form_client_id form_state form_redirect_uri
    consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \
      | head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \
      | cut -d'"' -f2) || consent_csrf=""
    form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \
      | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id=""
    form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \
      | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state=""
    form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \
      | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri=""
    if [ -n "$consent_csrf" ]; then
      local grant_headers
      grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \
        -D - -o /dev/null -X POST \
        "${forge_url}/login/oauth/grant" \
        --data-urlencode "_csrf=${consent_csrf}" \
        --data-urlencode "client_id=${form_client_id}" \
        --data-urlencode "state=${form_state}" \
        --data-urlencode "scope=" \
        --data-urlencode "nonce=" \
        --data-urlencode "redirect_uri=${form_redirect_uri}" \
        --data-urlencode "granted=true" \
        2>/dev/null) || grant_headers=""
      redirect_loc=$(printf '%s' "$grant_headers" \
        | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}')
      if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then
        auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/')
      fi
    fi
  fi
  rm -f "$auth_body_file"
  if [ -z "${auth_code:-}" ]; then
    echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2
    rm -f "$cookie_jar"
    return 1
  fi
  # Step 4: Complete Woodpecker OAuth callback (exchanges code for session)
  local state
  state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p')
  local wp_headers wp_token
  wp_headers=$(curl -sf -c "$cookie_jar" \
    -D - -o /dev/null \
    "${wp_server}/authorize?code=${auth_code}&state=${state:-}" \
    2>/dev/null) || wp_headers=""
  # Extract token from redirect URL (Woodpecker returns ?access_token=...)
  redirect_loc=$(printf '%s' "$wp_headers" \
    | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}')
  wp_token=""
  if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then
    wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/')
  fi
  # Fallback: check for user_sess cookie
  if [ -z "$wp_token" ]; then
    wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token=""
  fi
  rm -f "$cookie_jar"
  if [ -z "$wp_token" ]; then
    echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2
    return 1
  fi
  # Step 5: Create persistent personal access token via Woodpecker API
  # WP v3 requires CSRF header for POST operations with session tokens.
  local wp_csrf
  wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \
    "${wp_server}/web-config.js" 2>/dev/null \
    | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf=""
  local pat_resp final_token
  pat_resp=$(curl -sf -X POST \
    -b "user_sess=${wp_token}" \
    ${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \
    "${wp_server}/api/user/token" \
    2>/dev/null) || pat_resp=""
  final_token=""
  if [ -n "$pat_resp" ]; then
    final_token=$(printf '%s' "$pat_resp" \
      | jq -r 'if .token then .token elif .access_token then .access_token else empty end' \
      2>/dev/null) || final_token=""
  fi
  # Use persistent token if available, otherwise use session token
  final_token="${final_token:-$wp_token}"
  # Save to .env
  if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then
    sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file"
  else
    printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file"
  fi
  export WOODPECKER_TOKEN="$final_token"
  echo "Config:  WOODPECKER_TOKEN generated and saved to .env"
 }
 # Activate a repo in Woodpecker CI.
 # Usage: activate_woodpecker_repo <forge_repo>
 _activate_woodpecker_repo_impl() {
  local forge_repo="$1"
  local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}"
  # Wait for Woodpecker to become ready after stack start
  local retries=0
  while [ $retries -lt 10 ]; do
    if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then
      break
    fi
    retries=$((retries + 1))
    sleep 2
  done
  if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then
    echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2
    return
  fi
  echo ""
  echo "── Woodpecker repo activation ─────────────────────────"
  local wp_token="${WOODPECKER_TOKEN:-}"
  if [ -z "$wp_token" ]; then
    echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2
    echo "  Activate manually: woodpecker-cli repo add ${forge_repo}" >&2
    return
  fi
  local wp_repo_id
  wp_repo_id=$(curl -sf \
    -H "Authorization: Bearer ${wp_token}" \
    "${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \
    | jq -r '.id // empty' 2>/dev/null) || true
  if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
    echo "Repo:    ${forge_repo} already active in Woodpecker (id=${wp_repo_id})"
  else
    # Get Forgejo repo numeric ID for WP activation
    local forge_repo_id
    forge_repo_id=$(curl -sf \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \
      | jq -r '.id // empty' 2>/dev/null) || forge_repo_id=""
    local activate_resp
    activate_resp=$(curl -sf -X POST \
      -H "Authorization: Bearer ${wp_token}" \
      "${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \
      2>/dev/null) || activate_resp=""
    wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true
    if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
      echo "Repo:    ${forge_repo} activated in Woodpecker (id=${wp_repo_id})"
      # Set pipeline timeout to 5 minutes (default is 60)
      if curl -sf -X PATCH \
        -H "Authorization: Bearer ${wp_token}" \
        -H "Content-Type: application/json" \
        "${wp_server}/api/repos/${wp_repo_id}" \
        -d '{"timeout": 5}' >/dev/null 2>&1; then
        echo "Config:  pipeline timeout set to 5 minutes"
      fi
    else
      echo "Warning: could not activate repo in Woodpecker" >&2
      echo "  Activate manually: woodpecker-cli repo add ${forge_repo}" >&2
    fi
  fi
  # Store repo ID for later TOML generation
  if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
    _WP_REPO_ID="$wp_repo_id"
  fi
 }
--- a/lib/env.sh
+++ b/lib/env.sh
@ -13,22 +13,19 @@ FACTORY_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
 if [ "${DISINTO_CONTAINER:-}" = "1" ]; then
  DISINTO_DATA_DIR="${HOME}/data"
  DISINTO_LOG_DIR="${DISINTO_DATA_DIR}/logs"
-  mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener}
+  mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher}
 else
  DISINTO_LOG_DIR="${FACTORY_ROOT}"
 fi
 export DISINTO_LOG_DIR
 # Load secrets: prefer .env.enc (SOPS-encrypted), fall back to plaintext .env.
-# Always source .env — cron jobs inside the container do NOT inherit compose
+# Inside containers (DISINTO_CONTAINER=1), compose environment is the source of truth.
-# env vars (FORGE_TOKEN, etc.). Compose-injected vars (like FORGE_URL) are
+# On bare metal, .env/.env.enc is sourced to provide default values.
-# already set and won't be clobbered since env.sh uses ${VAR:-default} patterns
+if [ "${DISINTO_CONTAINER:-}" != "1" ]; then
-# for derived values. FORGE_URL from .env (localhost:3000) is overridden below
+  if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then
 # by the compose-injected value when running via docker exec.
 if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then
    set -a
    _saved_forge_url="${FORGE_URL:-}"
  _saved_forge_token="${FORGE_TOKEN:-}"
    # Use temp file + validate dotenv format before sourcing (avoids eval injection)
    # SOPS -d automatically verifies MAC/GCM authentication tag during decryption
    _tmpenv=$(mktemp) || { echo "Error: failed to create temp file for .env.enc" >&2; exit 1; }
@ -55,17 +52,22 @@ if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then
    rm -f "$_tmpenv"
    set +a
    [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url"
-  [ -n "$_saved_forge_token" ] && export FORGE_TOKEN="$_saved_forge_token"
+  elif [ -f "$FACTORY_ROOT/.env" ]; then
 elif [ -f "$FACTORY_ROOT/.env" ]; then
    # Preserve compose-injected FORGE_URL (localhost in .env != forgejo in Docker)
    _saved_forge_url="${FORGE_URL:-}"
  _saved_forge_token="${FORGE_TOKEN:-}"
    set -a
    # shellcheck source=/dev/null
    source "$FACTORY_ROOT/.env"
    set +a
    [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url"
-  [ -n "$_saved_forge_token" ] && export FORGE_TOKEN="$_saved_forge_token"
+  fi
 fi
 # Allow per-container token override (#375): .env sets the default FORGE_TOKEN
 # (dev-bot), then FORGE_TOKEN_OVERRIDE replaces it for containers that need a
 # different Forgejo identity (e.g. dev-qwen).
 if [ -n "${FORGE_TOKEN_OVERRIDE:-}" ]; then
  export FORGE_TOKEN="$FORGE_TOKEN_OVERRIDE"
 fi
 # PATH: foundry, node, system
@ -77,16 +79,11 @@ if [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then
  source "${FACTORY_ROOT}/lib/load-project.sh" "$PROJECT_TOML"
 fi
-# Forge token: new FORGE_TOKEN > legacy CODEBERG_TOKEN
+# Forge token
-if [ -z "${FORGE_TOKEN:-}" ]; then
+export FORGE_TOKEN="${FORGE_TOKEN:-}"
  FORGE_TOKEN="${CODEBERG_TOKEN:-}"
 fi
 export FORGE_TOKEN
 export CODEBERG_TOKEN="${FORGE_TOKEN}"  # backwards compat
-# Review bot token: FORGE_REVIEW_TOKEN > legacy REVIEW_BOT_TOKEN
+# Review bot token
 export FORGE_REVIEW_TOKEN="${FORGE_REVIEW_TOKEN:-${REVIEW_BOT_TOKEN:-}}"
 export REVIEW_BOT_TOKEN="${FORGE_REVIEW_TOKEN}"  # backwards compat
 # Per-agent tokens (#747): each agent gets its own Forgejo identity.
 # Falls back to FORGE_TOKEN for backwards compat with single-token setups.
@ -97,18 +94,14 @@ export FORGE_SUPERVISOR_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}"
 export FORGE_PREDICTOR_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}"
 export FORGE_ARCHITECT_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}"
-# Bot usernames filter: FORGE_BOT_USERNAMES > legacy CODEBERG_BOT_USERNAMES
+# Bot usernames filter
-export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-${CODEBERG_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}}"
+export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}"
 export CODEBERG_BOT_USERNAMES="${FORGE_BOT_USERNAMES}"  # backwards compat
-# Project config (FORGE_* preferred, CODEBERG_* fallback)
+# Project config
-export FORGE_REPO="${FORGE_REPO:-${CODEBERG_REPO:-}}"
+export FORGE_REPO="${FORGE_REPO:-}"
 export CODEBERG_REPO="${FORGE_REPO}"  # backwards compat
 export FORGE_URL="${FORGE_URL:-http://localhost:3000}"
 export FORGE_API="${FORGE_API:-${FORGE_URL}/api/v1/repos/${FORGE_REPO}}"
 export FORGE_WEB="${FORGE_WEB:-${FORGE_URL}/${FORGE_REPO}}"
 export CODEBERG_API="${FORGE_API}"  # backwards compat
 export CODEBERG_WEB="${FORGE_WEB}"  # backwards compat
 # tea CLI login name: derived from FORGE_URL (codeberg vs local forgejo)
 if [ -z "${TEA_LOGIN:-}" ]; then
  case "${FORGE_URL}" in
@ -144,8 +137,12 @@ unset CLAWHUB_TOKEN 2>/dev/null || true
 export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1
 # Shared log helper
 # Usage: log "message"
 # Output: [2026-04-03T14:00:00Z] agent: message
 # Where agent is set via LOG_AGENT variable (defaults to caller's context)
 log() {
-  printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
+  local agent="${LOG_AGENT:-agent}"
  printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*"
 }
 # =============================================================================
@ -209,8 +206,6 @@ forge_api() {
    -H "Content-Type: application/json" \
    "${FORGE_API}${path}" "$@"
 }
 # Backwards-compat alias
 codeberg_api() { forge_api "$@"; }
 # Paginate a Forge API GET endpoint and return all items as a merged JSON array.
 # Usage: forge_api_all /path             (no existing query params)
@ -254,13 +249,13 @@ woodpecker_api() {
  fi
  curl -sfL \
-    -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \
+    -H "Authorization: Bearer ${WOODPECKER_TOKEN:-}" \
-    "${WOODPECKER_SERVER}/api${path}" "$@"
+    "${WOODPECKER_SERVER:-}/api${path}" "$@"
 }
 # Woodpecker DB query helper
 wpdb() {
-  PGPASSWORD="${WOODPECKER_DB_PASSWORD}" psql \
+  PGPASSWORD="${WOODPECKER_DB_PASSWORD:-}" psql \
    -U "${WOODPECKER_DB_USER:-woodpecker}" \
    -h "${WOODPECKER_DB_HOST:-127.0.0.1}" \
    -d "${WOODPECKER_DB_NAME:-woodpecker}" \
--- a/lib/file-action-issue.sh
+++ b/lib/file-action-issue.sh
@ -1,59 +0,0 @@
 #!/usr/bin/env bash
 # file-action-issue.sh — File an action issue for a formula run
 #
 # Usage: source this file, then call file_action_issue.
 # Requires: forge_api() from lib/env.sh, jq, lib/secret-scan.sh
 #
 # file_action_issue <formula_name> <title> <body>
 #   Sets FILED_ISSUE_NUM on success.
 #   Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected
 # Load secret scanner
 # shellcheck source=secret-scan.sh
 source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh"
 file_action_issue() {
  local formula_name="$1" title="$2" body="$3"
  FILED_ISSUE_NUM=""
  # Secret scan: reject issue bodies containing embedded secrets
  if ! scan_for_secrets "$body"; then
    echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2
    return 4
  fi
  # Dedup: skip if an open action issue for this formula already exists
  local open_actions
  open_actions=$(forge_api_all "/issues?state=open&type=issues&labels=action" 2>/dev/null || true)
  if [ -n "$open_actions" ] && [ "$open_actions" != "null" ]; then
    local existing
    existing=$(printf '%s' "$open_actions" | \
      jq --arg f "$formula_name" '[.[] | select(.title | test($f))] | length' 2>/dev/null || echo 0)
    if [ "${existing:-0}" -gt 0 ]; then
      return 1
    fi
  fi
  # Fetch 'action' label ID
  local action_label_id
  action_label_id=$(forge_api GET "/labels" 2>/dev/null | \
    jq -r '.[] | select(.name == "action") | .id' 2>/dev/null || true)
  if [ -z "$action_label_id" ]; then
    return 2
  fi
  # Create the issue
  local payload result
  payload=$(jq -nc \
    --arg title "$title" \
    --arg body "$body" \
    --argjson labels "[$action_label_id]" \
    '{title: $title, body: $body, labels: $labels}')
  result=$(forge_api POST "/issues" -d "$payload" 2>/dev/null || true)
  FILED_ISSUE_NUM=$(printf '%s' "$result" | jq -r '.number // empty' 2>/dev/null || true)
  if [ -z "$FILED_ISSUE_NUM" ]; then
    return 3
  fi
 }
--- a/lib/forge-push.sh
+++ b/lib/forge-push.sh
@ -0,0 +1,93 @@
 #!/usr/bin/env bash
 # =============================================================================
 # forge-push.sh — push_to_forge() function
 #
 # Handles pushing a local clone to the Forgejo remote and verifying the push.
 #
 # Globals expected:
 #   FORGE_URL    - Forge instance URL (e.g. http://localhost:3000)
 #   FORGE_TOKEN  - API token for Forge operations (used for API verification)
 #   FACTORY_ROOT - Root of the disinto factory
 #   PRIMARY_BRANCH - Primary branch name (e.g. main)
 #
 # Usage:
 #   source "${FACTORY_ROOT}/lib/forge-push.sh"
 #   push_to_forge <repo_root> <forge_url> <repo_slug>
 # =============================================================================
 set -euo pipefail
 # Assert required globals are set before using this module.
 _assert_forge_push_globals() {
  local missing=()
  [ -z "${FORGE_URL:-}" ]      && missing+=("FORGE_URL")
  [ -z "${FORGE_TOKEN:-}" ]    && missing+=("FORGE_TOKEN")
  [ -z "${FACTORY_ROOT:-}" ]   && missing+=("FACTORY_ROOT")
  [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
  if [ "${#missing[@]}" -gt 0 ]; then
    echo "Error: forge-push.sh requires these globals to be set: ${missing[*]}" >&2
    exit 1
  fi
 }
 # Push local clone to the Forgejo remote.
 push_to_forge() {
  local repo_root="$1" forge_url="$2" repo_slug="$3"
  # Use clean URL — credential helper supplies auth (#604).
  # Forgejo 11.x rejects API tokens for git HTTP push (#361); password auth works
  # via the credential helper configured in configure_git_creds().
  local remote_url="${forge_url}/${repo_slug}.git"
  local display_url="$remote_url"
  # Always set the remote URL to ensure credentials are current
  if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then
    git -C "$repo_root" remote set-url forgejo "$remote_url"
  else
    git -C "$repo_root" remote add forgejo "$remote_url"
  fi
  echo "Remote:  forgejo -> ${display_url}"
  # Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo)
  if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then
    echo "Push:    skipped (local repo has no commits)"
    return 0
  fi
  # Push all branches and tags
  echo "Pushing: branches to forgejo"
  if ! git -C "$repo_root" push forgejo --all 2>&1; then
    echo "Error: failed to push branches to Forgejo" >&2
    return 1
  fi
  echo "Pushing: tags to forgejo"
  if ! git -C "$repo_root" push forgejo --tags 2>&1; then
    echo "Error: failed to push tags to Forgejo" >&2
    return 1
  fi
  # Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs)
  local is_empty="true"
  local verify_attempt
  for verify_attempt in $(seq 1 5); do
    local repo_info
    repo_info=$(curl -sf --max-time 10 \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info=""
    if [ -z "$repo_info" ]; then
      is_empty="skipped"
      break  # API unreachable, skip verification
    fi
    is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"')
    if [ "$is_empty" != "true" ]; then
      echo "Verify:  repo is not empty (push confirmed)"
      break
    fi
    if [ "$verify_attempt" -lt 5 ]; then
      sleep 2
    fi
  done
  if [ "$is_empty" = "true" ]; then
    echo "Warning: Forgejo repo still reports empty after push" >&2
    return 1
  fi
 }
--- a/lib/forge-setup.sh
+++ b/lib/forge-setup.sh
@ -0,0 +1,772 @@
 #!/usr/bin/env bash
 # =============================================================================
 # forge-setup.sh — setup_forge() and helpers for Forgejo provisioning
 #
 # Handles admin user creation, bot user creation, token generation,
 # password resets, repo creation, and collaborator setup.
 #
 # Globals expected (asserted by _load_init_context):
 #   FORGE_URL    - Forge instance URL (e.g. http://localhost:3000)
 #   FACTORY_ROOT - Root of the disinto factory
 #   PRIMARY_BRANCH - Primary branch name (e.g. main)
 #
 # Usage:
 #   source "${FACTORY_ROOT}/lib/forge-setup.sh"
 #   setup_forge <forge_url> <repo_slug>
 # =============================================================================
 set -euo pipefail
 # Assert required globals are set before using this module.
 _load_init_context() {
  local missing=()
  [ -z "${FORGE_URL:-}" ]    && missing+=("FORGE_URL")
  [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
  [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
  if [ "${#missing[@]}" -gt 0 ]; then
    echo "Error: forge-setup.sh requires these globals to be set: ${missing[*]}" >&2
    exit 1
  fi
 }
 # Execute a command in the Forgejo container (for admin operations)
 _forgejo_exec() {
  local use_bare="${DISINTO_BARE:-false}"
  if [ "$use_bare" = true ]; then
    docker exec -u git disinto-forgejo "$@"
  else
    docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@"
  fi
 }
 # Check if a token already exists in .env (for idempotency)
 # Returns 0 if token exists, 1 if it doesn't
 _token_exists_in_env() {
  local token_var="$1"
  local env_file="$2"
  grep -q "^${token_var}=" "$env_file" 2>/dev/null
 }
 # Check if a password already exists in .env (for idempotency)
 # Returns 0 if password exists, 1 if it doesn't
 _pass_exists_in_env() {
  local pass_var="$1"
  local env_file="$2"
  grep -q "^${pass_var}=" "$env_file" 2>/dev/null
 }
 # Provision or connect to a local Forgejo instance.
 # Creates admin + bot users, generates API tokens, stores in .env.
 # When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose.
 # Usage: setup_forge [--rotate-tokens] <forge_url> <repo_slug>
 setup_forge() {
  local rotate_tokens=false
  # Parse optional --rotate-tokens flag
  if [ "$1" = "--rotate-tokens" ]; then
    rotate_tokens=true
    shift
  fi
  local forge_url="$1"
  local repo_slug="$2"
  local use_bare="${DISINTO_BARE:-false}"
  echo ""
  echo "── Forge setup ────────────────────────────────────────"
  # Check if Forgejo is already running
  if curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/version" >/dev/null 2>&1; then
    echo "Forgejo:  ${forge_url} (already running)"
  else
    echo "Forgejo not reachable at ${forge_url}"
    echo "Starting Forgejo via Docker..."
    if ! command -v docker &>/dev/null; then
      echo "Error: docker not found — needed to provision Forgejo" >&2
      echo "  Install Docker or start Forgejo manually at ${forge_url}" >&2
      exit 1
    fi
    # Extract port from forge_url
    local forge_port
    forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|')
    forge_port="${forge_port:-3000}"
    if [ "$use_bare" = true ]; then
      # Bare-metal mode: standalone docker run
      mkdir -p "${FORGEJO_DATA_DIR}"
      if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then
        docker start disinto-forgejo >/dev/null 2>&1 || true
      else
        docker run -d \
          --name disinto-forgejo \
          --restart unless-stopped \
          -p "${forge_port}:3000" \
          -p 2222:22 \
          -v "${FORGEJO_DATA_DIR}:/data" \
          -e "FORGEJO__database__DB_TYPE=sqlite3" \
          -e "FORGEJO__server__ROOT_URL=${forge_url}/" \
          -e "FORGEJO__server__HTTP_PORT=3000" \
          -e "FORGEJO__service__DISABLE_REGISTRATION=true" \
          codeberg.org/forgejo/forgejo:11.0
      fi
    else
      # Compose mode: start Forgejo via docker compose
      docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo
    fi
    # Wait for Forgejo to become healthy
    echo -n "Waiting for Forgejo to start"
    local retries=0
    while ! curl -sf --max-time 3 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/version" >/dev/null 2>&1; do
      retries=$((retries + 1))
      if [ "$retries" -gt 60 ]; then
        echo ""
        echo "Error: Forgejo did not become ready within 60s" >&2
        exit 1
      fi
      echo -n "."
      sleep 1
    done
    echo " ready"
  fi
  # Wait for Forgejo database to accept writes (API may be ready before DB is)
  echo -n "Waiting for Forgejo database"
  local db_ready=false
  for _i in $(seq 1 30); do
    if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then
      db_ready=true
      break
    fi
    echo -n "."
    sleep 1
  done
  echo ""
  if [ "$db_ready" != true ]; then
    echo "Error: Forgejo database not ready after 30s" >&2
    exit 1
  fi
  # Create admin user if it doesn't exist
  local admin_user="disinto-admin"
  local admin_pass
  local env_file="${FACTORY_ROOT}/.env"
  # Re-read persisted admin password if available (#158)
  if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then
    admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-)
  fi
  # Generate a fresh password only when none was persisted
  if [ -z "${admin_pass:-}" ]; then
    admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
  fi
  if ! curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
    echo "Creating admin user: ${admin_user}"
    local create_output
    if ! create_output=$(_forgejo_exec forgejo admin user create \
      --admin \
      --username "${admin_user}" \
      --password "${admin_pass}" \
      --email "admin@disinto.local" \
      --must-change-password=false 2>&1); then
      echo "Error: failed to create admin user '${admin_user}':" >&2
      echo "  ${create_output}" >&2
      exit 1
    fi
    # Forgejo 11.x ignores --must-change-password=false on create;
    # explicitly clear the flag so basic-auth token creation works.
    _forgejo_exec forgejo admin user change-password \
      --username "${admin_user}" \
      --password "${admin_pass}" \
      --must-change-password=false
    # Verify admin user was actually created
    if ! curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
      echo "Error: admin user '${admin_user}' not found after creation" >&2
      exit 1
    fi
    # Persist admin password to .env for idempotent re-runs (#158)
    if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then
      sed -i "s|^FORGE_ADMIN_PASS=.*|FORGE_ADMIN_PASS=${admin_pass}|" "$env_file"
    else
      printf 'FORGE_ADMIN_PASS=%s\n' "$admin_pass" >> "$env_file"
    fi
  else
    echo "Admin user: ${admin_user} (already exists)"
    # Only reset password if basic auth fails (#158, #267)
    # Forgejo 11.x may ignore --must-change-password=false, blocking token creation
    if ! curl -sf --max-time 5 -u "${admin_user}:${admin_pass}" \
        "${forge_url}/api/v1/user" >/dev/null 2>&1; then
      _forgejo_exec forgejo admin user change-password \
        --username "${admin_user}" \
        --password "${admin_pass}" \
        --must-change-password=false
    fi
  fi
  # Preserve password for Woodpecker OAuth2 token generation (#779)
  _FORGE_ADMIN_PASS="$admin_pass"
  # Create human user (disinto-admin) as site admin if it doesn't exist
  local human_user="disinto-admin"
  local human_pass
  human_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
  if ! curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
    echo "Creating human user: ${human_user}"
    local create_output
    if ! create_output=$(_forgejo_exec forgejo admin user create \
      --admin \
      --username "${human_user}" \
      --password "${human_pass}" \
      --email "admin@disinto.local" \
      --must-change-password=false 2>&1); then
      echo "Error: failed to create human user '${human_user}':" >&2
      echo "  ${create_output}" >&2
      exit 1
    fi
    # Forgejo 11.x ignores --must-change-password=false on create;
    # explicitly clear the flag so basic-auth token creation works.
    _forgejo_exec forgejo admin user change-password \
      --username "${human_user}" \
      --password "${human_pass}" \
      --must-change-password=false
    # Verify human user was actually created
    if ! curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
      echo "Error: human user '${human_user}' not found after creation" >&2
      exit 1
    fi
    echo "  Human user '${human_user}' created as site admin"
  else
    echo "Human user: ${human_user} (already exists)"
  fi
  # Delete existing admin token if present (token sha1 is only returned at creation time)
  local existing_token_id
  existing_token_id=$(curl -sf \
    -u "${admin_user}:${admin_pass}" \
    "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \
    | jq -r '.[] | select(.name == "disinto-admin-token") | .id') || existing_token_id=""
  if [ -n "$existing_token_id" ]; then
    curl -sf -X DELETE \
      -u "${admin_user}:${admin_pass}" \
      "${forge_url}/api/v1/users/${admin_user}/tokens/${existing_token_id}" >/dev/null 2>&1 || true
  fi
  # Create admin token (fresh, so sha1 is returned)
  local admin_token
  admin_token=$(curl -sf -X POST \
    -u "${admin_user}:${admin_pass}" \
    -H "Content-Type: application/json" \
    "${forge_url}/api/v1/users/${admin_user}/tokens" \
    -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \
    | jq -r '.sha1 // empty') || admin_token=""
  if [ -z "$admin_token" ]; then
    echo "Error: failed to obtain admin API token" >&2
    exit 1
  fi
  # Get or create human user token
  local human_token=""
  # Delete existing human token if present (token sha1 is only returned at creation time)
  local existing_human_token_id
  existing_human_token_id=$(curl -sf \
    -u "${human_user}:${human_pass}" \
    "${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \
    | jq -r '.[] | select(.name == "disinto-human-token") | .id') || existing_human_token_id=""
  if [ -n "$existing_human_token_id" ]; then
    curl -sf -X DELETE \
      -u "${human_user}:${human_pass}" \
      "${forge_url}/api/v1/users/${human_user}/tokens/${existing_human_token_id}" >/dev/null 2>&1 || true
  fi
  # Create human token (fresh, so sha1 is returned)
  human_token=$(curl -sf -X POST \
    -u "${human_user}:${human_pass}" \
    -H "Content-Type: application/json" \
    "${forge_url}/api/v1/users/${human_user}/tokens" \
    -d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \
    | jq -r '.sha1 // empty') || human_token=""
  if [ -n "$human_token" ]; then
    # Store human token in .env
    if grep -q '^HUMAN_TOKEN=' "$env_file" 2>/dev/null; then
      sed -i "s|^HUMAN_TOKEN=.*|HUMAN_TOKEN=${human_token}|" "$env_file"
    else
      printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file"
    fi
    export HUMAN_TOKEN="$human_token"
    echo "  Human token saved (HUMAN_TOKEN)"
  fi
  # Create bot users and tokens
  # Each agent gets its own Forgejo account for identity and audit trail (#747).
  # Map: bot-username -> env-var-name for the token
  local -A bot_token_vars=(
    [dev-bot]="FORGE_TOKEN"
    [review-bot]="FORGE_REVIEW_TOKEN"
    [planner-bot]="FORGE_PLANNER_TOKEN"
    [gardener-bot]="FORGE_GARDENER_TOKEN"
    [vault-bot]="FORGE_VAULT_TOKEN"
    [supervisor-bot]="FORGE_SUPERVISOR_TOKEN"
    [predictor-bot]="FORGE_PREDICTOR_TOKEN"
    [architect-bot]="FORGE_ARCHITECT_TOKEN"
  )
  # Map: bot-username -> env-var-name for the password
  # Forgejo 11.x API tokens don't work for git HTTP push (#361).
  # Store passwords so agents can use password auth for git operations.
  local -A bot_pass_vars=(
    [dev-bot]="FORGE_PASS"
    [review-bot]="FORGE_REVIEW_PASS"
    [planner-bot]="FORGE_PLANNER_PASS"
    [gardener-bot]="FORGE_GARDENER_PASS"
    [vault-bot]="FORGE_VAULT_PASS"
    [supervisor-bot]="FORGE_SUPERVISOR_PASS"
    [predictor-bot]="FORGE_PREDICTOR_PASS"
    [architect-bot]="FORGE_ARCHITECT_PASS"
  )
  # Llama bot users (local-model agents) — separate from main agents
  # Each llama agent gets its own Forgejo user, token, and password
  local -A llama_token_vars=(
    [dev-qwen]="FORGE_TOKEN_LLAMA"
    [dev-qwen-nightly]="FORGE_TOKEN_LLAMA_NIGHTLY"
  )
  local -A llama_pass_vars=(
    [dev-qwen]="FORGE_PASS_LLAMA"
    [dev-qwen-nightly]="FORGE_PASS_LLAMA_NIGHTLY"
  )
  local bot_user bot_pass token token_var pass_var
  for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do
    token_var="${bot_token_vars[$bot_user]}"
    pass_var="${bot_pass_vars[$bot_user]}"
    # Check if token already exists in .env
    local token_exists=false
    if _token_exists_in_env "$token_var" "$env_file"; then
      token_exists=true
    fi
    # Check if password already exists in .env
    local pass_exists=false
    if _pass_exists_in_env "$pass_var" "$env_file"; then
      pass_exists=true
    fi
    # Check if bot user exists on Forgejo
    local user_exists=false
    if curl -sf --max-time 5 \
      -H "Authorization: token ${admin_token}" \
      "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then
      user_exists=true
    fi
    # Skip token/password regeneration if both exist in .env and not forcing rotation
    if [ "$token_exists" = true ] && [ "$pass_exists" = true ] && [ "$rotate_tokens" = false ]; then
      echo "  ${bot_user} token and password preserved (use --rotate-tokens to force)"
      # Still export the existing token for use within this run
      local existing_token existing_pass
      existing_token=$(grep "^${token_var}=" "$env_file" | head -1 | cut -d= -f2-)
      existing_pass=$(grep "^${pass_var}=" "$env_file" | head -1 | cut -d= -f2-)
      export "${token_var}=${existing_token}"
      export "${pass_var}=${existing_pass}"
      continue
    fi
    # Generate new credentials if:
    # - Token doesn't exist (first run)
    # - Password doesn't exist (first run)
    # - --rotate-tokens flag is set (explicit rotation)
    if [ "$user_exists" = false ]; then
      # User doesn't exist - create it
      bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
      echo "Creating bot user: ${bot_user}"
      local create_output
      if ! create_output=$(_forgejo_exec forgejo admin user create \
        --username "${bot_user}" \
        --password "${bot_pass}" \
        --email "${bot_user}@disinto.local" \
        --must-change-password=false 2>&1); then
        echo "Error: failed to create bot user '${bot_user}':" >&2
        echo "  ${create_output}" >&2
        exit 1
      fi
      # Forgejo 11.x ignores --must-change-password=false on create;
      # explicitly clear the flag so basic-auth token creation works.
      _forgejo_exec forgejo admin user change-password \
        --username "${bot_user}" \
        --password "${bot_pass}" \
        --must-change-password=false
      # Verify bot user was actually created
      if ! curl -sf --max-time 5 \
        -H "Authorization: token ${admin_token}" \
        "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then
        echo "Error: bot user '${bot_user}' not found after creation" >&2
        exit 1
      fi
      echo "  ${bot_user} user created"
    else
      # User exists - reset password if needed
      echo "  ${bot_user} user exists"
      if [ "$rotate_tokens" = true ] || [ "$pass_exists" = false ]; then
        bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
        _forgejo_exec forgejo admin user change-password \
          --username "${bot_user}" \
          --password "${bot_pass}" \
          --must-change-password=false || {
          echo "Error: failed to reset password for existing bot user '${bot_user}'" >&2
          exit 1
        }
        echo "  ${bot_user} password reset for token generation"
      else
        # Password exists, get it from .env
        bot_pass=$(grep "^${pass_var}=" "$env_file" | head -1 | cut -d= -f2-)
      fi
    fi
    # Generate token via API (basic auth as the bot user — Forgejo requires
    # basic auth on POST /users/{username}/tokens, token auth is rejected)
    # First, try to delete existing tokens to avoid name collision
    # Use bot user's own Basic Auth (we just set the password above)
    local existing_token_ids
    existing_token_ids=$(curl -sf \
      -u "${bot_user}:${bot_pass}" \
      "${forge_url}/api/v1/users/${bot_user}/tokens" 2>/dev/null \
      | jq -r '.[].id // empty' 2>/dev/null) || existing_token_ids=""
    # Delete any existing tokens for this user
    if [ -n "$existing_token_ids" ]; then
      while IFS= read -r tid; do
        [ -n "$tid" ] && curl -sf -X DELETE \
          -u "${bot_user}:${bot_pass}" \
          "${forge_url}/api/v1/users/${bot_user}/tokens/${tid}" >/dev/null 2>&1 || true
      done <<< "$existing_token_ids"
    fi
    token=$(curl -sf -X POST \
      -u "${bot_user}:${bot_pass}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/users/${bot_user}/tokens" \
      -d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \
      | jq -r '.sha1 // empty') || token=""
    if [ -z "$token" ]; then
      echo "Error: failed to create API token for '${bot_user}'" >&2
      exit 1
    fi
    # Store token in .env under the per-agent variable name
    if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then
      sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file"
    else
      printf '%s=%s\n' "$token_var" "$token" >> "$env_file"
    fi
    export "${token_var}=${token}"
    echo "  ${bot_user} token generated and saved (${token_var})"
    # Store password in .env for git HTTP push (#361)
    # Forgejo 11.x API tokens don't work for git push; password auth does.
    if grep -q "^${pass_var}=" "$env_file" 2>/dev/null; then
      sed -i "s|^${pass_var}=.*|${pass_var}=${bot_pass}|" "$env_file"
    else
      printf '%s=%s\n' "$pass_var" "$bot_pass" >> "$env_file"
    fi
    export "${pass_var}=${bot_pass}"
    echo "  ${bot_user} password saved (${pass_var})"
    # Backwards-compat aliases for dev-bot and review-bot
    if [ "$bot_user" = "dev-bot" ]; then
      export CODEBERG_TOKEN="$token"
    elif [ "$bot_user" = "review-bot" ]; then
      export REVIEW_BOT_TOKEN="$token"
    fi
  done
  # Create llama bot users and tokens (local-model agents)
  # These are separate from the main agents and get their own credentials
  echo ""
  echo "── Setting up llama bot users ────────────────────────────"
  local llama_user llama_pass llama_token llama_token_var llama_pass_var
  for llama_user in "${!llama_token_vars[@]}"; do
    llama_token_var="${llama_token_vars[$llama_user]}"
    llama_pass_var="${llama_pass_vars[$llama_user]}"
    # Check if token already exists in .env
    local token_exists=false
    if _token_exists_in_env "$llama_token_var" "$env_file"; then
      token_exists=true
    fi
    # Check if password already exists in .env
    local pass_exists=false
    if _pass_exists_in_env "$llama_pass_var" "$env_file"; then
      pass_exists=true
    fi
    # Check if llama bot user exists on Forgejo
    local llama_user_exists=false
    if curl -sf --max-time 5 \
      -H "Authorization: token ${admin_token}" \
      "${forge_url}/api/v1/users/${llama_user}" >/dev/null 2>&1; then
      llama_user_exists=true
    fi
    # Skip token/password regeneration if both exist in .env and not forcing rotation
    if [ "$token_exists" = true ] && [ "$pass_exists" = true ] && [ "$rotate_tokens" = false ]; then
      echo "  ${llama_user} token and password preserved (use --rotate-tokens to force)"
      # Still export the existing token for use within this run
      local existing_token existing_pass
      existing_token=$(grep "^${llama_token_var}=" "$env_file" | head -1 | cut -d= -f2-)
      existing_pass=$(grep "^${llama_pass_var}=" "$env_file" | head -1 | cut -d= -f2-)
      export "${llama_token_var}=${existing_token}"
      export "${llama_pass_var}=${existing_pass}"
      continue
    fi
    # Generate new credentials if:
    # - Token doesn't exist (first run)
    # - Password doesn't exist (first run)
    # - --rotate-tokens flag is set (explicit rotation)
    if [ "$llama_user_exists" = false ]; then
      # User doesn't exist - create it
      llama_pass="llama-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
      echo "Creating llama bot user: ${llama_user}"
      local create_output
      if ! create_output=$(_forgejo_exec forgejo admin user create \
        --username "${llama_user}" \
        --password "${llama_pass}" \
        --email "${llama_user}@disinto.local" \
        --must-change-password=false 2>&1); then
        echo "Error: failed to create llama bot user '${llama_user}':" >&2
        echo "  ${create_output}" >&2
        exit 1
      fi
      # Forgejo 11.x ignores --must-change-password=false on create;
      # explicitly clear the flag so basic-auth token creation works.
      _forgejo_exec forgejo admin user change-password \
        --username "${llama_user}" \
        --password "${llama_pass}" \
        --must-change-password=false
      # Verify llama bot user was actually created
      if ! curl -sf --max-time 5 \
        -H "Authorization: token ${admin_token}" \
        "${forge_url}/api/v1/users/${llama_user}" >/dev/null 2>&1; then
        echo "Error: llama bot user '${llama_user}' not found after creation" >&2
        exit 1
      fi
      echo "  ${llama_user} user created"
    else
      # User exists - reset password if needed
      echo "  ${llama_user} user exists"
      if [ "$rotate_tokens" = true ] || [ "$pass_exists" = false ]; then
        llama_pass="llama-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
        _forgejo_exec forgejo admin user change-password \
          --username "${llama_user}" \
          --password "${llama_pass}" \
          --must-change-password=false || {
          echo "Error: failed to reset password for existing llama bot user '${llama_user}'" >&2
          exit 1
        }
        echo "  ${llama_user} password reset for token generation"
      else
        # Password exists, get it from .env
        llama_pass=$(grep "^${llama_pass_var}=" "$env_file" | head -1 | cut -d= -f2-)
      fi
    fi
    # Generate token via API (basic auth as the llama user)
    # First, delete any existing tokens to avoid name collision
    local existing_llama_token_ids
    existing_llama_token_ids=$(curl -sf \
      -u "${llama_user}:${llama_pass}" \
      "${forge_url}/api/v1/users/${llama_user}/tokens" 2>/dev/null \
      | jq -r '.[].id // empty' 2>/dev/null) || existing_llama_token_ids=""
    # Delete any existing tokens for this user
    if [ -n "$existing_llama_token_ids" ]; then
      while IFS= read -r tid; do
        [ -n "$tid" ] && curl -sf -X DELETE \
          -u "${llama_user}:${llama_pass}" \
          "${forge_url}/api/v1/users/${llama_user}/tokens/${tid}" >/dev/null 2>&1 || true
      done <<< "$existing_llama_token_ids"
    fi
    llama_token=$(curl -sf -X POST \
      -u "${llama_user}:${llama_pass}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/users/${llama_user}/tokens" \
      -d "{\"name\":\"disinto-${llama_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \
      | jq -r '.sha1 // empty') || llama_token=""
    if [ -z "$llama_token" ]; then
      echo "Error: failed to create API token for '${llama_user}'" >&2
      exit 1
    fi
    # Store token in .env under the llama-specific variable name
    if grep -q "^${llama_token_var}=" "$env_file" 2>/dev/null; then
      sed -i "s|^${llama_token_var}=.*|${llama_token_var}=${llama_token}|" "$env_file"
    else
      printf '%s=%s\n' "$llama_token_var" "$llama_token" >> "$env_file"
    fi
    export "${llama_token_var}=${llama_token}"
    echo "  ${llama_user} token generated and saved (${llama_token_var})"
    # Store password in .env for git HTTP push (#361)
    # Forgejo 11.x API tokens don't work for git push; password auth does.
    if grep -q "^${llama_pass_var}=" "$env_file" 2>/dev/null; then
      sed -i "s|^${llama_pass_var}=.*|${llama_pass_var}=${llama_pass}|" "$env_file"
    else
      printf '%s=%s\n' "$llama_pass_var" "$llama_pass" >> "$env_file"
    fi
    export "${llama_pass_var}=${llama_pass}"
    echo "  ${llama_user} password saved (${llama_pass_var})"
  done
  # Create .profile repos for all bot users (if they don't already exist)
  # This runs the same logic as hire-an-agent Step 2-3 for idempotent setup
  echo ""
  echo "── Setting up .profile repos ────────────────────────────"
  local -a bot_users=(dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot)
  # Add llama bot users to .profile repo creation
  for llama_user in "${!llama_token_vars[@]}"; do
    bot_users+=("$llama_user")
  done
  local bot_user
  for bot_user in "${bot_users[@]}"; do
    # Check if .profile repo already exists
    if curl -sf --max-time 5 -H "Authorization: token ${admin_token}" "${forge_url}/api/v1/repos/${bot_user}/.profile" >/dev/null 2>&1; then
      echo "  ${bot_user}/.profile already exists"
      continue
    fi
    echo "Creating ${bot_user}/.profile repo..."
    # Create the repo using the admin API to ensure it's created in the bot user's namespace
    local create_output
    create_output=$(curl -sf -X POST \
      -u "${admin_user}:${admin_pass}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/admin/users/${bot_user}/repos" \
      -d "{\"name\":\".profile\",\"description\":\"${bot_user}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true
    if echo "$create_output" | grep -q '"id":\|[0-9]'; then
      echo "  Created ${bot_user}/.profile (via admin API)"
    else
      echo "  Warning: failed to create ${bot_user}/.profile: ${create_output}" >&2
    fi
  done
  # Store FORGE_URL in .env if not already present
  if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then
    printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file"
  fi
  # Create the repo on Forgejo if it doesn't exist
  local org_name="${repo_slug%%/*}"
  local repo_name="${repo_slug##*/}"
  # Check if repo already exists
  if ! curl -sf --max-time 5 \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then
    # Try creating org first (ignore if exists)
    curl -sf -X POST \
      -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/orgs" \
      -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true
    # Create repo under org
    if ! curl -sf -X POST \
      -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/orgs/${org_name}/repos" \
      -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then
      # Fallback: create under the human user namespace using admin endpoint
      if [ -n "${admin_token:-}" ]; then
        if ! curl -sf -X POST \
          -H "Authorization: token ${admin_token}" \
          -H "Content-Type: application/json" \
          "${forge_url}/api/v1/admin/users/${org_name}/repos" \
          -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then
          echo "Error: failed to create repo '${repo_slug}' on Forgejo (admin endpoint)" >&2
          exit 1
        fi
      elif [ -n "${HUMAN_TOKEN:-}" ]; then
        if ! curl -sf -X POST \
          -H "Authorization: token ${HUMAN_TOKEN}" \
          -H "Content-Type: application/json" \
          "${forge_url}/api/v1/user/repos" \
          -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then
          echo "Error: failed to create repo '${repo_slug}' on Forgejo (user endpoint)" >&2
          exit 1
        fi
      else
        echo "Error: failed to create repo '${repo_slug}' — no admin or human token available" >&2
        exit 1
      fi
    fi
    # Add all bot users as collaborators with appropriate permissions
    # dev-bot: write (PR creation via lib/vault.sh)
    # review-bot: read (PR review)
    # planner-bot: write (prerequisites.md, memory)
    # gardener-bot: write (backlog grooming)
    # vault-bot: write (vault items)
    # supervisor-bot: read (health monitoring)
    # predictor-bot: read (pattern detection)
    # architect-bot: write (sprint PRs)
    local bot_perm
    declare -A bot_permissions=(
      [dev-bot]="write"
      [review-bot]="read"
      [planner-bot]="write"
      [gardener-bot]="write"
      [vault-bot]="write"
      [supervisor-bot]="read"
      [predictor-bot]="read"
      [architect-bot]="write"
    )
    for bot_user in "${!bot_permissions[@]}"; do
      bot_perm="${bot_permissions[$bot_user]}"
      curl -sf -X PUT \
        -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
        -H "Content-Type: application/json" \
        "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \
        -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true
    done
    # Add llama bot users as write collaborators for local-model agents
    for llama_user in "${!llama_token_vars[@]}"; do
      curl -sf -X PUT \
        -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
        -H "Content-Type: application/json" \
        "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${llama_user}" \
        -d '{"permission":"write"}' >/dev/null 2>&1 || true
    done
    # Add disinto-admin as admin collaborator
    curl -sf -X PUT \
      -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/repos/${repo_slug}/collaborators/disinto-admin" \
      -d '{"permission":"admin"}' >/dev/null 2>&1 || true
    echo "Repo:    ${repo_slug} created on Forgejo"
  else
    echo "Repo:    ${repo_slug} (already exists on Forgejo)"
  fi
  echo "Forge:   ${forge_url} (ready)"
 }
--- a/lib/formula-session.sh
+++ b/lib/formula-session.sh
@ -1,60 +1,53 @@
 #!/usr/bin/env bash
-# formula-session.sh — Shared helpers for formula-driven cron agents
+# formula-session.sh — Shared helpers for formula-driven polling-loop agents
 #
-# Provides reusable functions for the common cron-wrapper + tmux-session
+# Provides reusable utility functions for the common polling-loop wrapper pattern
-# pattern used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh.
+# used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh.
 #
 # Functions:
-#   acquire_cron_lock   LOCK_FILE          — PID lock with stale cleanup
+#   acquire_run_lock    LOCK_FILE          — PID lock with stale cleanup
 #   check_memory        [MIN_MB]           — skip if available RAM too low
 #   load_formula        FORMULA_FILE       — sets FORMULA_CONTENT
 #   build_context_block FILE [FILE ...]    — sets CONTEXT_BLOCK
-#   start_formula_session SESSION WORKDIR PHASE_FILE — create tmux + claude
+#   build_prompt_footer [EXTRA_API_LINES]  — sets PROMPT_FOOTER (API ref + env)
-#   build_prompt_footer    [EXTRA_API]      — sets PROMPT_FOOTER (API ref + env + phase)
+#   build_sdk_prompt_footer [EXTRA_API]    — omits phase protocol (SDK mode)
-#   run_formula_and_monitor AGENT [TIMEOUT] [CALLBACK] — session start, inject, monitor, log
+#   formula_worktree_setup WORKTREE        — isolated worktree for formula execution
 #   formula_phase_callback PHASE           — standard crash-recovery callback
 #   formula_prepare_profile_context        — load lessons from .profile repo (pre-session)
 #   formula_lessons_block                  — return lessons block for prompt
 #   profile_write_journal ISSUE_NUM TITLE OUTCOME [FILES] — post-session journal
 #   profile_load_lessons                   — load lessons-learned.md into LESSONS_CONTEXT
 #   ensure_profile_repo [AGENT_IDENTITY]   — clone/pull .profile repo
 #   _profile_has_repo                      — check if agent has .profile repo
 #   _count_undigested_journals             — count journal entries to digest
 #   _profile_digest_journals               — digest journals into lessons
 #   _profile_commit_and_push MESSAGE [FILES] — commit/push to .profile repo
 #   resolve_agent_identity                 — resolve agent user login from FORGE_TOKEN
 #   build_graph_section                    — run build-graph.py and set GRAPH_SECTION
 #   build_scratch_instruction SCRATCH_FILE — return context scratch instruction
 #   read_scratch_context SCRATCH_FILE      — return scratch file content block
 #   ensure_ops_repo                        — clone/pull ops repo
 #   ops_commit_and_push MESSAGE [FILES]    — commit/push to ops repo
 #   cleanup_stale_crashed_worktrees [HOURS] — thin wrapper around worktree_cleanup_stale
 #
-# Requires: lib/agent-session.sh sourced first (for create_agent_session,
+# Requires: lib/env.sh, lib/worktree.sh sourced first for shared helpers.
 # agent_kill_session, agent_inject_into_session).
 # Globals used by formula_phase_callback: SESSION_NAME, PHASE_FILE,
 # PROJECT_REPO_ROOT, PROMPT (set by the calling script).
-# ── Cron guards ──────────────────────────────────────────────────────────
+# ── Run guards ───────────────────────────────────────────────────────────
-# acquire_cron_lock LOCK_FILE
+# acquire_run_lock LOCK_FILE
 # Acquires a PID lock. Exits 0 if another instance is running.
 # Sets an EXIT trap to clean up the lock file.
-acquire_cron_lock() {
+acquire_run_lock() {
-  _CRON_LOCK_FILE="$1"
+  _RUN_LOCK_FILE="$1"
-  if [ -f "$_CRON_LOCK_FILE" ]; then
+  if [ -f "$_RUN_LOCK_FILE" ]; then
    local lock_pid
-    lock_pid=$(cat "$_CRON_LOCK_FILE" 2>/dev/null || true)
+    lock_pid=$(cat "$_RUN_LOCK_FILE" 2>/dev/null || true)
    if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then
      log "run: already running (PID $lock_pid)"
      exit 0
    fi
-    rm -f "$_CRON_LOCK_FILE"
+    rm -f "$_RUN_LOCK_FILE"
  fi
  echo $$ > "$_CRON_LOCK_FILE"
  trap 'rm -f "$_CRON_LOCK_FILE"' EXIT
 }
 # check_memory [MIN_MB]
 # Exits 0 (skip) if available memory is below MIN_MB (default 2000).
 check_memory() {
  local min_mb="${1:-2000}"
  # Graceful fallback if free command is not available (procps not installed)
  if ! command -v free &>/dev/null; then
    log "run: free not found, skipping memory check"
    return 0
  fi
  local avail_mb
  avail_mb=$(free -m | awk '/Mem:/{print $7}')
  if [ "${avail_mb:-0}" -lt "$min_mb" ]; then
    log "run: skipping — only ${avail_mb}MB available (need ${min_mb})"
    exit 0
  fi
  echo $$ > "$_RUN_LOCK_FILE"
  trap 'rm -f "$_RUN_LOCK_FILE"' EXIT
 }
 # ── Agent identity resolution ────────────────────────────────────────────
@ -80,6 +73,24 @@ resolve_agent_identity() {
  return 0
 }
 # ── Forge remote resolution ──────────────────────────────────────────────
 # resolve_forge_remote
 # Resolves FORGE_REMOTE by matching FORGE_URL hostname against git remotes.
 # Falls back to "origin" if no match found.
 # Requires: FORGE_URL, git repo with remotes configured.
 # Exports: FORGE_REMOTE (always set).
 resolve_forge_remote() {
  # Extract hostname from FORGE_URL (e.g., https://codeberg.org/user/repo -> codeberg.org)
  _forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||; s|:.*||')
  # Find git remote whose push URL matches the forge host
  FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}')
  # Fallback to origin if no match found
  FORGE_REMOTE="${FORGE_REMOTE:-origin}"
  export FORGE_REMOTE
  log "forge remote: ${FORGE_REMOTE}"
 }
 # ── .profile repo management ──────────────────────────────────────────────
 # ensure_profile_repo [AGENT_IDENTITY]
@ -102,11 +113,9 @@ ensure_profile_repo() {
  # Define cache directory: /home/agent/data/.profile/{agent-name}
  PROFILE_REPO_PATH="${HOME:-/home/agent}/data/.profile/${agent_identity}"
-  # Build clone URL from FORGE_URL and agent identity
+  # Build clone URL from FORGE_URL — credential helper supplies auth (#604)
  local forge_url="${FORGE_URL:-http://localhost:3000}"
-  local auth_url
+  local clone_url="${forge_url}/${agent_identity}/.profile.git"
  auth_url=$(printf '%s' "$forge_url" | sed "s|://|://$(whoami):${FORGE_TOKEN}@|")
  local clone_url="${auth_url}/${agent_identity}/.profile.git"
  # Check if already cached and up-to-date
  if [ -d "${PROFILE_REPO_PATH}/.git" ]; then
@ -139,7 +148,7 @@ ensure_profile_repo() {
 # Checks if the agent has a .profile repo by querying Forgejo API.
 # Returns 0 if repo exists, 1 otherwise.
 _profile_has_repo() {
-  local agent_identity="${1:-${AGENT_IDENTITY:-}}"
+  local agent_identity="${AGENT_IDENTITY:-}"
  if [ -z "$agent_identity" ]; then
    if ! resolve_agent_identity; then
@ -175,8 +184,8 @@ _count_undigested_journals() {
 # Runs a claude -p one-shot to digest undigested journals into lessons-learned.md
 # Returns 0 on success, 1 on failure.
 _profile_digest_journals() {
-  local agent_identity="${1:-${AGENT_IDENTITY:-}}"
+  local agent_identity="${AGENT_IDENTITY:-}"
-  local model="${2:-${CLAUDE_MODEL:-opus}}"
+  local model="${CLAUDE_MODEL:-opus}"
  if [ -z "$agent_identity" ]; then
    if ! resolve_agent_identity; then
@ -242,7 +251,6 @@ Write the complete, rewritten lessons-learned.md content below. No preamble, no
  output=$(claude -p "$digest_prompt" \
    --output-format json \
    --dangerously-skip-permissions \
    --max-tokens 1000 \
    ${model:+--model "$model"} \
    2>>"$LOGFILE" || echo '{"result":"error"}')
@ -437,7 +445,6 @@ Write the journal entry below. Use markdown format."
  output=$(claude -p "$reflection_prompt" \
    --output-format json \
    --dangerously-skip-permissions \
    --max-tokens 500 \
    ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} \
    2>>"$LOGFILE" || echo '{"result":"error"}')
@ -454,16 +461,15 @@ Write the journal entry below. Use markdown format."
  local journal_dir="${PROFILE_REPO_PATH}/journal"
  mkdir -p "$journal_dir"
-  # Write journal entry (append if exists)
+  # Write journal entry with timestamped filename for accumulation
-  local journal_file="${journal_dir}/issue-${issue_num}.md"
+  local ts
-  if [ -f "$journal_file" ]; then
+  ts=$(date -u +%Y%m%d-%H%M%S)
-    printf '\n---\n\n' >> "$journal_file"
+  local journal_file="${journal_dir}/issue-${issue_num}-${ts}.md"
  fi
  printf '%s\n' "$journal_content" >> "$journal_file"
-  log "profile: wrote journal entry for issue #${issue_num}"
+  log "profile: wrote journal entry for issue #${issue_num} (${ts})"
  # Commit and push to .profile repo
-  _profile_commit_and_push "journal: issue #${issue_num} reflection" "journal/issue-${issue_num}.md"
+  _profile_commit_and_push "journal: issue #${issue_num} reflection (${ts})" "journal/issue-${issue_num}-${ts}.md"
  return 0
 }
@ -562,7 +568,7 @@ $(cat "$ctx_path")
  done
 }
-# ── Ops repo helpers ─────────────────────────────────────────────────
+# ── Ops repo helpers ────────────────────────────────────────────────────
 # ensure_ops_repo
 # Clones or pulls the ops repo so agents can read/write operational data.
@ -584,14 +590,8 @@ ensure_ops_repo() {
  local ops_repo="${FORGE_OPS_REPO:-}"
  [ -n "$ops_repo" ] || return 0
  local forge_url="${FORGE_URL:-http://localhost:3000}"
-  local clone_url
+  # Use clean URL — credential helper supplies auth (#604)
-  if [ -n "${FORGE_TOKEN:-}" ]; then
+  local clone_url="${forge_url}/${ops_repo}.git"
    local auth_url
    auth_url=$(printf '%s' "$forge_url" | sed "s|://|://$(whoami):${FORGE_TOKEN}@|")
    clone_url="${auth_url}/${ops_repo}.git"
  else
    clone_url="${forge_url}/${ops_repo}.git"
  fi
  log "Cloning ops repo: ${ops_repo} -> ${ops_root}"
  if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then
@ -625,90 +625,6 @@ ops_commit_and_push() {
  )
 }
 # ── Session management ───────────────────────────────────────────────────
 # start_formula_session SESSION WORKDIR PHASE_FILE
 # Kills stale session, resets phase file, creates a per-agent git worktree
 # for session isolation, and creates a new tmux + claude session in it.
 # Sets _FORMULA_SESSION_WORKDIR to the worktree path (or original workdir
 # on fallback). Callers must clean up via remove_formula_worktree after
 # the session ends.
 # Returns 0 on success, 1 on failure.
 start_formula_session() {
  local session="$1" workdir="$2" phase_file="$3"
  agent_kill_session "$session"
  rm -f "$phase_file"
  # Create per-agent git worktree for session isolation.
  # Each agent gets its own CWD so Claude Code treats them as separate
  # projects — no resume collisions between sequential formula runs.
  _FORMULA_SESSION_WORKDIR="/tmp/disinto-${session}"
  # Clean up any stale worktree from a previous run
  git -C "$workdir" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true
  if git -C "$workdir" worktree add "$_FORMULA_SESSION_WORKDIR" HEAD --detach 2>/dev/null; then
    log "Created worktree: ${_FORMULA_SESSION_WORKDIR}"
  else
    log "WARNING: worktree creation failed — falling back to ${workdir}"
    _FORMULA_SESSION_WORKDIR="$workdir"
  fi
  log "Creating tmux session: ${session}"
  if ! create_agent_session "$session" "$_FORMULA_SESSION_WORKDIR" "$phase_file"; then
    log "ERROR: failed to create tmux session ${session}"
    return 1
  fi
 }
 # remove_formula_worktree
 # Removes the worktree created by start_formula_session if it differs from
 # PROJECT_REPO_ROOT. Safe to call multiple times. No-op if no worktree was created.
 remove_formula_worktree() {
  if [ -n "${_FORMULA_SESSION_WORKDIR:-}" ] \
     && [ "$_FORMULA_SESSION_WORKDIR" != "${PROJECT_REPO_ROOT:-}" ]; then
    git -C "$PROJECT_REPO_ROOT" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true
    log "Removed worktree: ${_FORMULA_SESSION_WORKDIR}"
  fi
 }
 # formula_phase_callback PHASE
 # Standard crash-recovery phase callback for formula sessions.
 # Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT.
 # Uses _FORMULA_CRASH_COUNT (auto-initialized) for single-retry limit.
 # shellcheck disable=SC2154  # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller
 formula_phase_callback() {
  local phase="$1"
  log "phase: ${phase}"
  case "$phase" in
    PHASE:crashed)
      if [ "${_FORMULA_CRASH_COUNT:-0}" -gt 0 ]; then
        log "ERROR: session crashed again after recovery — giving up"
        return 0
      fi
      _FORMULA_CRASH_COUNT=$(( ${_FORMULA_CRASH_COUNT:-0} + 1 ))
      log "WARNING: tmux session died unexpectedly — attempting recovery"
      if create_agent_session "${_MONITOR_SESSION:-$SESSION_NAME}" "${_FORMULA_SESSION_WORKDIR:-$PROJECT_REPO_ROOT}" "$PHASE_FILE" 2>/dev/null; then
        agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" "$PROMPT"
        log "Recovery session started"
      else
        log "ERROR: could not restart session after crash"
      fi
      ;;
    PHASE:done|PHASE:failed|PHASE:escalate|PHASE:merged)
      agent_kill_session "${_MONITOR_SESSION:-$SESSION_NAME}"
      ;;
  esac
 }
 # ── Stale crashed worktree cleanup ─────────────────────────────────────────
 # cleanup_stale_crashed_worktrees [MAX_AGE_HOURS]
 # Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh.
 # Kept for backwards compatibility with existing callers.
 # Requires: lib/worktree.sh sourced.
 cleanup_stale_crashed_worktrees() {
  worktree_cleanup_stale "${1:-24}"
 }
 # ── Scratch file helpers (compaction survival) ────────────────────────────
 # build_scratch_instruction SCRATCH_FILE
@ -784,25 +700,26 @@ build_sdk_prompt_footer() {
 # Creates an isolated worktree for synchronous formula execution.
 # Fetches primary branch, cleans stale worktree, creates new one, and
 # sets an EXIT trap for cleanup.
-# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH.
+# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH, FORGE_REMOTE.
 # Ensure resolve_forge_remote() is called before this function.
 formula_worktree_setup() {
  local worktree="$1"
  cd "$PROJECT_REPO_ROOT" || return
-  git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true
+  git fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true
  worktree_cleanup "$worktree"
-  git worktree add "$worktree" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null
+  git worktree add "$worktree" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" --detach 2>/dev/null
  # shellcheck disable=SC2064  # expand worktree now, not at trap time
  trap "worktree_cleanup '$worktree'" EXIT
 }
-# ── Prompt + monitor helpers ──────────────────────────────────────────────
+# ── Prompt helpers ──────────────────────────────────────────────────────
 # build_prompt_footer [EXTRA_API_LINES]
-# Assembles the common forge API reference + environment + phase protocol
+# Assembles the common forge API reference + environment block for formula prompts.
-# block for formula prompts.  Sets PROMPT_FOOTER.
+# Sets PROMPT_FOOTER.
 # Pass additional API endpoint lines (pre-formatted, newline-prefixed) via $1.
 # Requires globals: FORGE_API, FACTORY_ROOT, PROJECT_REPO_ROOT,
-#                   PRIMARY_BRANCH, PHASE_FILE.
+#                   PRIMARY_BRANCH.
 build_prompt_footer() {
  local extra_api="${1:-}"
  # shellcheck disable=SC2034  # consumed by the calling script's PROMPT
@ -818,66 +735,15 @@ NEVER echo or include the actual token value in output — always reference \${F
 FACTORY_ROOT=${FACTORY_ROOT}
 PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT}
 OPS_REPO_ROOT=${OPS_REPO_ROOT}
-PRIMARY_BRANCH=${PRIMARY_BRANCH}
+PRIMARY_BRANCH=${PRIMARY_BRANCH}"
 PHASE_FILE=${PHASE_FILE}
 ## Phase protocol (REQUIRED)
 When all work is done:
  echo 'PHASE:done' > '${PHASE_FILE}'
 On unrecoverable error:
  printf 'PHASE:failed\nReason: %s\n' 'describe error' > '${PHASE_FILE}'"
 }
-# run_formula_and_monitor AGENT_NAME [TIMEOUT]
+# ── Stale crashed worktree cleanup ────────────────────────────────────────
 # Starts the formula session, injects PROMPT, monitors phase, and logs result.
 # Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT,
 #                   FORGE_REPO, CLAUDE_MODEL (exported).
 # shellcheck disable=SC2154  # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller
 run_formula_and_monitor() {
  local agent_name="$1"
  local timeout="${2:-7200}"
  local callback="${3:-formula_phase_callback}"
-  if ! start_formula_session "$SESSION_NAME" "$PROJECT_REPO_ROOT" "$PHASE_FILE"; then
+# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS]
-    exit 1
+# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh.
-  fi
+# Kept for backwards compatibility with existing callers.
-
+# Requires: lib/worktree.sh sourced.
-  # Write phase protocol to context file for compaction survival
+cleanup_stale_crashed_worktrees() {
-  if [ -n "${PROMPT_FOOTER:-}" ]; then
+  worktree_cleanup_stale "${1:-24}"
    write_compact_context "$PHASE_FILE" "$PROMPT_FOOTER"
  fi
  agent_inject_into_session "$SESSION_NAME" "$PROMPT"
  log "Prompt sent to tmux session"
  log "Monitoring phase file: ${PHASE_FILE}"
  _FORMULA_CRASH_COUNT=0
  monitor_phase_loop "$PHASE_FILE" "$timeout" "$callback"
  FINAL_PHASE=$(read_phase "$PHASE_FILE")
  log "Final phase: ${FINAL_PHASE:-none}"
  if [ "$FINAL_PHASE" != "PHASE:done" ]; then
    case "${_MONITOR_LOOP_EXIT:-}" in
      idle_prompt)
        log "${agent_name}: Claude returned to prompt without writing phase signal"
        ;;
      idle_timeout)
        log "${agent_name}: timed out with no phase signal"
        ;;
      *)
        log "${agent_name} finished without PHASE:done (phase: ${FINAL_PHASE:-none}, exit: ${_MONITOR_LOOP_EXIT:-})"
        ;;
    esac
  fi
  # Preserve worktree on crash for debugging; clean up on success
  if [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ]; then
    worktree_preserve "${_FORMULA_SESSION_WORKDIR:-}" "crashed (agent=${agent_name})"
  else
    remove_formula_worktree
  fi
  log "--- ${agent_name^} run done ---"
 }
--- a/lib/generators.sh
+++ b/lib/generators.sh
@ -0,0 +1,672 @@
 #!/usr/bin/env bash
 # =============================================================================
 # generators — template generation functions for disinto init
 #
 # Generates docker-compose.yml, Dockerfile, Caddyfile, staging index, and
 # deployment pipeline configs.
 #
 # Globals expected (must be set before sourcing):
 #   FACTORY_ROOT   - Root of the disinto factory
 #   PROJECT_NAME   - Project name for the project repo (defaults to 'project')
 #   PRIMARY_BRANCH - Primary branch name (defaults to 'main')
 #
 # Usage:
 #   source "${FACTORY_ROOT}/lib/generators.sh"
 #   generate_compose "$forge_port"
 #   generate_caddyfile
 #   generate_staging_index
 #   generate_deploy_pipelines "$repo_root" "$project_name"
 # =============================================================================
 set -euo pipefail
 # Assert required globals are set
 : "${FACTORY_ROOT:?FACTORY_ROOT must be set}"
 # PROJECT_NAME defaults to 'project' if not set (env.sh may have set it from FORGE_REPO)
 PROJECT_NAME="${PROJECT_NAME:-project}"
 # PRIMARY_BRANCH defaults to main (env.sh may have set it to 'master')
 PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}"
 # Helper: extract woodpecker_repo_id from a project TOML file
 # Returns empty string if not found or file doesn't exist
 _get_woodpecker_repo_id() {
  local toml_file="$1"
  if [ -f "$toml_file" ]; then
    python3 -c "
 import sys, tomllib
 try:
    with open(sys.argv[1], 'rb') as f:
        cfg = tomllib.load(f)
    ci = cfg.get('ci', {})
    wp_id = ci.get('woodpecker_repo_id', '0')
    print(wp_id)
 except Exception:
    print('0')
 " "$toml_file" 2>/dev/null || echo "0"
  else
    echo "0"
  fi
 }
 # Find all project TOML files and extract the highest woodpecker_repo_id
 # (used for the main agents service which doesn't have a per-project TOML)
 _get_primary_woodpecker_repo_id() {
  local projects_dir="${FACTORY_ROOT}/projects"
  local max_id="0"
  for toml in "${projects_dir}"/*.toml; do
    [ -f "$toml" ] || continue
    local repo_id
    repo_id=$(_get_woodpecker_repo_id "$toml")
    if [ -n "$repo_id" ] && [ "$repo_id" != "0" ]; then
      # Use the first non-zero repo_id found (or highest if multiple)
      if [ "$repo_id" -gt "$max_id" ] 2>/dev/null; then
        max_id="$repo_id"
      fi
    fi
  done
  echo "$max_id"
 }
 # Parse project TOML for local-model agents and emit compose services.
 # Writes service definitions to stdout; caller handles insertion into compose file.
 _generate_local_model_services() {
  local compose_file="$1"
  local projects_dir="${FACTORY_ROOT}/projects"
  local temp_file
  temp_file=$(mktemp)
  local has_services=false
  local all_vols=""
  # Find all project TOML files and extract [agents.*] sections
  for toml in "${projects_dir}"/*.toml; do
    [ -f "$toml" ] || continue
    # Get woodpecker_repo_id for this project
    local wp_repo_id
    wp_repo_id=$(_get_woodpecker_repo_id "$toml")
    # Parse [agents.*] sections using Python - output YAML-compatible format
    while IFS='=' read -r key value; do
      case "$key" in
        NAME) service_name="$value" ;;
        BASE_URL) base_url="$value" ;;
        MODEL) model="$value" ;;
        ROLES) roles="$value" ;;
        API_KEY) api_key="$value" ;;
        FORGE_USER) forge_user="$value" ;;
        COMPACT_PCT) compact_pct="$value" ;;
        POLL_INTERVAL) poll_interval_val="$value" ;;
        ---)
          if [ -n "$service_name" ] && [ -n "$base_url" ]; then
            cat >> "$temp_file" <<EOF
  agents-${service_name}:
    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    container_name: disinto-agents-${service_name}
    restart: unless-stopped
    security_opt:
      - apparmor=unconfined
    volumes:
      - agents-${service_name}-data:/home/agent/data
      - project-repos:/home/agent/repos
      - \${HOME}/.claude:/home/agent/.claude
      - \${HOME}/.claude.json:/home/agent/.claude.json:ro
      - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro
      - \${HOME}/.ssh:/home/agent/.ssh:ro
    environment:
      FORGE_URL: http://forgejo:3000
      # Use llama-specific credentials if available, otherwise fall back to main FORGE_TOKEN
      FORGE_TOKEN: \${FORGE_TOKEN_LLAMA:-\${FORGE_TOKEN:-}}
      FORGE_PASS: \${FORGE_PASS_LLAMA:-\${FORGE_PASS:-}}
      FORGE_REVIEW_TOKEN: \${FORGE_REVIEW_TOKEN:-}
      FORGE_BOT_USERNAMES: \${FORGE_BOT_USERNAMES:-}
      AGENT_ROLES: "${roles}"
      CLAUDE_TIMEOUT: \${CLAUDE_TIMEOUT:-7200}
      ANTHROPIC_BASE_URL: "${base_url}"
      ANTHROPIC_API_KEY: "${api_key}"
      CLAUDE_MODEL: "${model}"
      CLAUDE_CONFIG_DIR: /home/agent/.claude-${service_name}
      CLAUDE_CREDENTIALS_DIR: /home/agent/.claude-${service_name}/credentials
      CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "${compact_pct}"
      CLAUDE_CODE_ATTRIBUTION_HEADER: "0"
      CLAUDE_CODE_ENABLE_TELEMETRY: "0"
      DISINTO_CONTAINER: "1"
      PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
      WOODPECKER_DATA_DIR: /woodpecker-data
      WOODPECKER_REPO_ID: "${wp_repo_id}"
      FORGE_BOT_USER_${service_name^^}: "${forge_user}"
      POLL_INTERVAL: "${poll_interval_val}"
    depends_on:
      - forgejo
      - woodpecker
    networks:
      - disinto-net
    profiles: ["agents-${service_name}"]
 EOF
            has_services=true
          fi
          # Collect volume name for later
          local vol_name="  agents-${service_name}-data:"
          if [ -n "$all_vols" ]; then
            all_vols="${all_vols}
 ${vol_name}"
          else
            all_vols="${vol_name}"
          fi
          service_name="" base_url="" model="" roles="" api_key="" forge_user="" compact_pct="" poll_interval_val=""
          ;;
      esac
    done < <(python3 -c '
 import sys, tomllib, json, re
 with open(sys.argv[1], "rb") as f:
    cfg = tomllib.load(f)
 agents = cfg.get("agents", {})
 for name, config in agents.items():
    if not isinstance(config, dict):
        continue
    base_url = config.get("base_url", "")
    model = config.get("model", "")
    if not base_url or not model:
        continue
    roles = config.get("roles", ["dev"])
    roles_str = " ".join(roles) if isinstance(roles, list) else roles
    api_key = config.get("api_key", "sk-no-key-required")
    forge_user = config.get("forge_user", f"{name}-bot")
    compact_pct = config.get("compact_pct", 60)
    poll_interval = config.get("poll_interval", 60)
    safe_name = name.lower()
    safe_name = re.sub(r"[^a-z0-9]", "-", safe_name)
    # Output as simple key=value lines
    print(f"NAME={safe_name}")
    print(f"BASE_URL={base_url}")
    print(f"MODEL={model}")
    print(f"ROLES={roles_str}")
    print(f"API_KEY={api_key}")
    print(f"FORGE_USER={forge_user}")
    print(f"COMPACT_PCT={compact_pct}")
    print(f"POLL_INTERVAL={poll_interval}")
    print("---")
 ' "$toml" 2>/dev/null)
  done
  if [ "$has_services" = true ]; then
    # Insert the services before the volumes section
    local temp_compose
    temp_compose=$(mktemp)
    # Get everything before volumes:
    sed -n '1,/^volumes:/p' "$compose_file" | sed '$d' > "$temp_compose"
    # Add the services
    cat "$temp_file" >> "$temp_compose"
    # Add the volumes section and everything after
    sed -n '/^volumes:/,$p' "$compose_file" >> "$temp_compose"
    # Add local-model volumes to the volumes section
    if [ -n "$all_vols" ]; then
      # Find the volumes section and add the new volumes
      sed -i "/^volumes:/{n;:a;n;/^[a-z]/!{s/$/\n$all_vols/;b};ba}" "$temp_compose"
    fi
    mv "$temp_compose" "$compose_file"
  fi
  rm -f "$temp_file"
 }
 # Generate docker-compose.yml in the factory root.
 # **CANONICAL SOURCE**: This generator is the single source of truth for docker-compose.yml.
 # The tracked docker-compose.yml file has been removed. Operators must run 'bin/disinto init'
 # to materialize a working stack on a fresh checkout.
 _generate_compose_impl() {
  local forge_port="${1:-3000}"
  local compose_file="${FACTORY_ROOT}/docker-compose.yml"
  # Check if compose file already exists
  if [ -f "$compose_file" ]; then
    echo "Compose: ${compose_file} (already exists, skipping)"
    return 0
  fi
  # Extract primary woodpecker_repo_id from project TOML files
  local wp_repo_id
  wp_repo_id=$(_get_primary_woodpecker_repo_id)
  cat > "$compose_file" <<'COMPOSEEOF'
 # docker-compose.yml — generated by disinto init
 # Brings up Forgejo, Woodpecker, and the agent runtime.
 services:
  forgejo:
    image: codeberg.org/forgejo/forgejo:11.0
    container_name: disinto-forgejo
    restart: unless-stopped
    security_opt:
      - apparmor=unconfined
    volumes:
      - forgejo-data:/data
    environment:
      FORGEJO__database__DB_TYPE: sqlite3
      FORGEJO__server__ROOT_URL: http://forgejo:3000/
      FORGEJO__server__HTTP_PORT: "3000"
      FORGEJO__security__INSTALL_LOCK: "true"
      FORGEJO__service__DISABLE_REGISTRATION: "true"
      FORGEJO__webhook__ALLOWED_HOST_LIST: "private"
    networks:
      - disinto-net
  woodpecker:
    image: woodpeckerci/woodpecker-server:v3
    container_name: disinto-woodpecker
    restart: unless-stopped
    security_opt:
      - apparmor=unconfined
    ports:
      - "8000:8000"
      - "9000:9000"
    volumes:
      - woodpecker-data:/var/lib/woodpecker
    environment:
      WOODPECKER_FORGEJO: "true"
      WOODPECKER_FORGEJO_URL: http://forgejo:3000
      WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-}
      WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-}
      WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000}
      WOODPECKER_OPEN: "true"
      WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
      WOODPECKER_DATABASE_DRIVER: sqlite3
      WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite
      WOODPECKER_ENVIRONMENT: "FORGE_TOKEN:${FORGE_TOKEN}"
    depends_on:
      - forgejo
    networks:
      - disinto-net
  woodpecker-agent:
    image: woodpeckerci/woodpecker-agent:v3
    container_name: disinto-woodpecker-agent
    restart: unless-stopped
    network_mode: host
    privileged: true
    security_opt:
      - apparmor=unconfined
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    environment:
      WOODPECKER_SERVER: localhost:9000
      WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
      WOODPECKER_GRPC_SECURE: "false"
      WOODPECKER_HEALTHCHECK_ADDR: ":3333"
      WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net
      WOODPECKER_MAX_WORKFLOWS: 1
    depends_on:
      - woodpecker
  agents:
    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    container_name: disinto-agents
    restart: unless-stopped
    security_opt:
      - apparmor=unconfined
    volumes:
      - agent-data:/home/agent/data
      - project-repos:/home/agent/repos
      - ${HOME}/.claude:/home/agent/.claude
      - ${HOME}/.claude.json:/home/agent/.claude.json:ro
      - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro
      - ${HOME}/.ssh:/home/agent/.ssh:ro
      - ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro
      - woodpecker-data:/woodpecker-data:ro
    environment:
      FORGE_URL: http://forgejo:3000
      FORGE_TOKEN: ${FORGE_TOKEN:-}
      FORGE_REVIEW_TOKEN: ${FORGE_REVIEW_TOKEN:-}
      FORGE_PLANNER_TOKEN: ${FORGE_PLANNER_TOKEN:-}
      FORGE_GARDENER_TOKEN: ${FORGE_GARDENER_TOKEN:-}
      FORGE_VAULT_TOKEN: ${FORGE_VAULT_TOKEN:-}
      FORGE_SUPERVISOR_TOKEN: ${FORGE_SUPERVISOR_TOKEN:-}
      FORGE_PREDICTOR_TOKEN: ${FORGE_PREDICTOR_TOKEN:-}
      FORGE_ARCHITECT_TOKEN: ${FORGE_ARCHITECT_TOKEN:-}
      FORGE_BOT_USERNAMES: ${FORGE_BOT_USERNAMES:-}
      WOODPECKER_TOKEN: ${WOODPECKER_TOKEN:-}
      CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200}
      CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
      FORGE_PASS: ${FORGE_PASS:-}
      FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-}
      FACTORY_REPO: ${FORGE_REPO:-disinto-admin/disinto}
      DISINTO_CONTAINER: "1"
      PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
      WOODPECKER_DATA_DIR: /woodpecker-data
      WOODPECKER_REPO_ID: "PLACEHOLDER_WP_REPO_ID"
    # IMPORTANT: agents get explicit environment variables (forge tokens, CI tokens, config).
    # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in
    # .env.vault.enc and are NEVER injected here — only the runner
    # container receives them at fire time (AD-006, #745).
    depends_on:
      - forgejo
      - woodpecker
    networks:
      - disinto-net
  runner:
    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    profiles: ["vault"]
    security_opt:
      - apparmor=unconfined
    volumes:
      - agent-data:/home/agent/data
    environment:
      FORGE_URL: http://forgejo:3000
      DISINTO_CONTAINER: "1"
      PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
    # Vault redesign in progress (PR-based approval, see #73-#77)
    # This container is being replaced — entrypoint will be updated in follow-up
    networks:
      - disinto-net
  # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging
  # Serves on ports 80/443, routes based on path
  edge:
    build: ./docker/edge
    container_name: disinto-edge
    security_opt:
      - apparmor=unconfined
    ports:
      - "80:80"
      - "443:443"
    environment:
      - DISINTO_VERSION=${DISINTO_VERSION:-main}
      - FORGE_URL=http://forgejo:3000
      - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
      - FORGE_OPS_REPO=${FORGE_OPS_REPO:-disinto-admin/disinto-ops}
      - FORGE_TOKEN=${FORGE_TOKEN:-}
      - FORGE_PASS=${FORGE_PASS:-}
      - FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin}
      - FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-}
      - OPS_REPO_ROOT=/opt/disinto-ops
      - PROJECT_REPO_ROOT=/opt/disinto
      - PRIMARY_BRANCH=main
    volumes:
      - ./docker/Caddyfile:/etc/caddy/Caddyfile
      - caddy_data:/data
      - /var/run/docker.sock:/var/run/docker.sock
    depends_on:
      - forgejo
      - woodpecker
      - staging
    networks:
      - disinto-net
  # Staging container — static file server for staging artifacts
  # Edge proxy routes to this container for default requests
  staging:
    image: caddy:alpine
    command: ["caddy", "file-server", "--root", "/srv/site"]
    security_opt:
      - apparmor=unconfined
    volumes:
      - ./docker:/srv/site:ro
    networks:
      - disinto-net
  # Staging deployment slot — activated by Woodpecker staging pipeline (#755).
  # Profile-gated: only starts when explicitly targeted by deploy commands.
  # Customize image/ports/volumes for your project after init.
  staging-deploy:
    image: alpine:3
    profiles: ["staging"]
    security_opt:
      - apparmor=unconfined
    environment:
      DEPLOY_ENV: staging
    networks:
      - disinto-net
    command: ["echo", "staging slot — replace with project image"]
 volumes:
  forgejo-data:
  woodpecker-data:
  agent-data:
  project-repos:
  caddy_data:
 networks:
  disinto-net:
    driver: bridge
 COMPOSEEOF
  # Patch PROJECT_REPO_ROOT — interpolate PROJECT_NAME at generation time
  # (Docker Compose cannot resolve it; it's a shell variable, not a .env var)
  sed -i "s|\${PROJECT_NAME:-project}|${PROJECT_NAME}|g" "$compose_file"
  # Patch WOODPECKER_REPO_ID — interpolate at generation time
  # (Docker Compose cannot resolve it; it's a shell variable, not a .env var)
  if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
    sed -i "s|PLACEHOLDER_WP_REPO_ID|${wp_repo_id}|g" "$compose_file"
  else
    # Default to empty if no repo_id found (agents will handle gracefully)
    sed -i "s|PLACEHOLDER_WP_REPO_ID||g" "$compose_file"
  fi
  # Patch the forgejo port mapping into the file if non-default
  if [ "$forge_port" != "3000" ]; then
    # Add port mapping to forgejo service so it's reachable from host during init
    sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\    ports:\\n      - \"${forge_port}:3000\"" "$compose_file"
  else
    sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\    ports:\\n      - \"3000:3000\"" "$compose_file"
  fi
  # Append local-model agent services if any are configured
  # (must run before CLAUDE_BIN_PLACEHOLDER substitution so the placeholder
  # in local-model services is also resolved)
  _generate_local_model_services "$compose_file"
  # Patch the Claude CLI binary path — resolve from host PATH at init time.
  local claude_bin
  claude_bin="$(command -v claude 2>/dev/null || true)"
  if [ -n "$claude_bin" ]; then
    # Resolve symlinks to get the real binary path
    claude_bin="$(readlink -f "$claude_bin")"
    sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|g" "$compose_file"
  else
    echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2
    sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|g" "$compose_file"
  fi
  echo "Created: ${compose_file}"
 }
 # Generate docker/agents/ files if they don't already exist.
 _generate_agent_docker_impl() {
  local docker_dir="${FACTORY_ROOT}/docker/agents"
  mkdir -p "$docker_dir"
  if [ ! -f "${docker_dir}/Dockerfile" ]; then
    echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2
  fi
  if [ ! -f "${docker_dir}/entrypoint.sh" ]; then
    echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2
  fi
 }
 # Generate docker/Caddyfile template for edge proxy.
 _generate_caddyfile_impl() {
  local docker_dir="${FACTORY_ROOT}/docker"
  local caddyfile="${docker_dir}/Caddyfile"
  if [ -f "$caddyfile" ]; then
    echo "Caddyfile:  ${caddyfile} (already exists, skipping)"
    return
  fi
  cat > "$caddyfile" <<'CADDYFILEEOF'
 # Caddyfile — edge proxy configuration
 # IP-only binding at bootstrap; domain + TLS added later via vault resource request
 :80 {
    # Reverse proxy to Forgejo
    handle /forgejo/* {
        reverse_proxy forgejo:3000
    }
    # Reverse proxy to Woodpecker CI
    handle /ci/* {
        reverse_proxy woodpecker:8000
    }
    # Default: proxy to staging container
    handle {
        reverse_proxy staging:80
    }
 }
 CADDYFILEEOF
  echo "Created: ${caddyfile}"
 }
 # Generate docker/index.html default page.
 _generate_staging_index_impl() {
  local docker_dir="${FACTORY_ROOT}/docker"
  local index_file="${docker_dir}/index.html"
  if [ -f "$index_file" ]; then
    echo "Staging:  ${index_file} (already exists, skipping)"
    return
  fi
  cat > "$index_file" <<'INDEXEOF'
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Nothing shipped yet</title>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
            display: flex;
            align-items: center;
            justify-content: center;
            min-height: 100vh;
            margin: 0;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
        }
        .container {
            text-align: center;
            padding: 2rem;
        }
        h1 {
            font-size: 3rem;
            margin: 0 0 1rem 0;
        }
        p {
            font-size: 1.25rem;
            opacity: 0.9;
        }
    </style>
 </head>
 <body>
    <div class="container">
        <h1>Nothing shipped yet</h1>
        <p>CI pipelines will update this page with your staging artifacts.</p>
    </div>
 </body>
 </html>
 INDEXEOF
  echo "Created: ${index_file}"
 }
 # Generate template .woodpecker/ deployment pipeline configs in a project repo.
 # Creates staging.yml and production.yml alongside the project's existing CI config.
 # These pipelines trigger on Woodpecker's deployment event with environment filters.
 _generate_deploy_pipelines_impl() {
  local repo_root="$1"
  local project_name="$2"
  : "${project_name// /}"  # Silence SC2034 - variable used in heredoc
  local wp_dir="${repo_root}/.woodpecker"
  mkdir -p "$wp_dir"
  # Skip if deploy pipelines already exist
  if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then
    echo "Deploy:  .woodpecker/{staging,production}.yml (already exist)"
    return
  fi
  if [ ! -f "${wp_dir}/staging.yml" ]; then
    cat > "${wp_dir}/staging.yml" <<'STAGINGEOF'
 # .woodpecker/staging.yml — Staging deployment pipeline
 # Triggered by runner via Woodpecker promote API.
 # Human approves promotion in vault → runner calls promote → this runs.
 when:
  event: deployment
  environment: staging
 steps:
  - name: deploy-staging
    image: docker:27
    commands:
      - echo "Deploying to staging environment..."
      - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}"
      # Pull the image built by CI and deploy to staging
      # Customize these commands for your project:
      # - docker compose -f docker-compose.yml --profile staging up -d
      - echo "Staging deployment complete"
  - name: verify-staging
    image: alpine:3
    commands:
      - echo "Verifying staging deployment..."
      # Add health checks, smoke tests, or integration tests here:
      # - curl -sf http://staging:8080/health || exit 1
      - echo "Staging verification complete"
 STAGINGEOF
    echo "Created: ${wp_dir}/staging.yml"
  fi
  if [ ! -f "${wp_dir}/production.yml" ]; then
    cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF'
 # .woodpecker/production.yml — Production deployment pipeline
 # Triggered by runner via Woodpecker promote API.
 # Human approves promotion in vault → runner calls promote → this runs.
 when:
  event: deployment
  environment: production
 steps:
  - name: deploy-production
    image: docker:27
    commands:
      - echo "Deploying to production environment..."
      - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging"
      # Pull the verified image and deploy to production
      # Customize these commands for your project:
      # - docker compose -f docker-compose.yml up -d
      - echo "Production deployment complete"
  - name: verify-production
    image: alpine:3
    commands:
      - echo "Verifying production deployment..."
      # Add production health checks here:
      # - curl -sf http://production:8080/health || exit 1
      - echo "Production verification complete"
 PRODUCTIONEOF
    echo "Created: ${wp_dir}/production.yml"
  fi
 }
--- a/lib/git-creds.sh
+++ b/lib/git-creds.sh
@ -0,0 +1,120 @@
 #!/usr/bin/env bash
 # git-creds.sh — Shared git credential helper configuration
 #
 # Configures a static credential helper for Forgejo password-based HTTP auth.
 # Forgejo 11.x rejects API tokens for git push (#361); password auth works.
 # This ensures all git operations (clone, fetch, push) use password auth
 # without needing tokens embedded in remote URLs (#604).
 #
 # Usage:
 #   source "${FACTORY_ROOT}/lib/git-creds.sh"
 #   configure_git_creds [HOME_DIR] [RUN_AS_CMD]
 #   repair_baked_cred_urls DIR [DIR ...]
 #
 # Globals expected:
 #   FORGE_PASS  — bot password for git HTTP auth
 #   FORGE_URL   — Forge instance URL (e.g. http://forgejo:3000)
 #   FORGE_TOKEN — API token (used to resolve bot username)
 set -euo pipefail
 # configure_git_creds [HOME_DIR] [RUN_AS_CMD]
 #   HOME_DIR    — home directory for the git user (default: $HOME or /home/agent)
 #   RUN_AS_CMD  — command prefix to run as another user (e.g. "gosu agent")
 #
 # Writes a credential helper script and configures git to use it globally.
 configure_git_creds() {
  local home_dir="${1:-${HOME:-/home/agent}}"
  local run_as="${2:-}"
  if [ -z "${FORGE_PASS:-}" ] || [ -z "${FORGE_URL:-}" ]; then
    return 0
  fi
  local forge_host forge_proto
  forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||')
  forge_proto=$(printf '%s' "$FORGE_URL" | sed 's|://.*||')
  # Determine the bot username from FORGE_TOKEN identity (or default to dev-bot)
  local bot_user=""
  if [ -n "${FORGE_TOKEN:-}" ]; then
    bot_user=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
      "${FORGE_URL}/api/v1/user" 2>/dev/null | jq -r '.login // empty') || bot_user=""
  fi
  bot_user="${bot_user:-dev-bot}"
  local helper_path="${home_dir}/.git-credentials-helper"
  # Write a static credential helper script (git credential protocol)
  cat > "$helper_path" <<CREDEOF
 #!/bin/sh
 # Auto-generated git credential helper for Forgejo password auth (#361, #604)
 # Only respond to "get" action; ignore "store" and "erase".
 [ "\$1" = "get" ] || exit 0
 # Read and discard stdin (git sends protocol/host info)
 cat >/dev/null
 echo "protocol=${forge_proto}"
 echo "host=${forge_host}"
 echo "username=${bot_user}"
 echo "password=${FORGE_PASS}"
 CREDEOF
  chmod 755 "$helper_path"
  # Set ownership and configure git if running as a different user
  if [ -n "$run_as" ]; then
    local target_user
    target_user=$(echo "$run_as" | awk '{print $NF}')
    chown "${target_user}:${target_user}" "$helper_path" 2>/dev/null || true
    $run_as bash -c "git config --global credential.helper '${helper_path}'"
  else
    git config --global credential.helper "$helper_path"
  fi
  # Set safe.directory to work around dubious ownership after container restart
  if [ -n "$run_as" ]; then
    $run_as bash -c "git config --global --add safe.directory '*'"
  else
    git config --global --add safe.directory '*'
  fi
 }
 # repair_baked_cred_urls DIR [DIR ...]
 #   Scans git repos under each DIR and rewrites remote URLs that contain
 #   embedded credentials (user:pass@host) to clean URLs.
 #   Logs each repair so operators can see the migration happened.
 #
 # Set _GIT_CREDS_LOG_FN to a custom log function name (default: echo).
 repair_baked_cred_urls() {
  local log_fn="${_GIT_CREDS_LOG_FN:-echo}"
  for dir in "$@"; do
    [ -d "$dir" ] || continue
    # Find git repos: either dir itself or immediate subdirectories
    local -a repos=()
    if [ -d "${dir}/.git" ]; then
      repos+=("$dir")
    else
      local sub
      for sub in "$dir"/*/; do
        [ -d "${sub}.git" ] && repos+=("${sub%/}")
      done
    fi
    local repo
    for repo in "${repos[@]}"; do
      local url
      url=$(git -C "$repo" config --get remote.origin.url 2>/dev/null || true)
      [ -n "$url" ] || continue
      # Check if URL contains embedded credentials: http(s)://user:pass@host
      if printf '%s' "$url" | grep -qE '^https?://[^/]+@'; then
        # Strip credentials: http(s)://user:pass@host/path -> http(s)://host/path
        local clean_url
        clean_url=$(printf '%s' "$url" | sed -E 's|(https?://)[^@]+@|\1|')
        git -C "$repo" remote set-url origin "$clean_url"
        $log_fn "Repaired baked credentials in ${repo} (remote origin -> ${clean_url})"
      fi
    done
  done
 }
--- a/lib/guard.sh
+++ b/lib/guard.sh
@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# guard.sh — Active-state guard for cron entry points
+# guard.sh — Active-state guard for polling-loop entry points
 #
 # Each agent checks for a state file before running. If the file
 # doesn't exist, the agent logs a skip and exits cleanly.
--- a/lib/hire-agent.sh
+++ b/lib/hire-agent.sh
@ -0,0 +1,503 @@
 #!/usr/bin/env bash
 # =============================================================================
 # hire-agent — disinto_hire_an_agent() function
 #
 # Handles user creation, .profile repo setup, formula copying, branch protection,
 # and state marker creation for hiring a new agent.
 #
 # Globals expected:
 #   FORGE_URL    - Forge instance URL
 #   FORGE_TOKEN  - Admin token for Forge operations
 #   FACTORY_ROOT - Root of the disinto factory
 #   PROJECT_NAME - Project name for email/domain generation
 #
 # Usage:
 #   source "${FACTORY_ROOT}/lib/hire-agent.sh"
 #   disinto_hire_an_agent <agent-name> <role> [--formula <path>] [--local-model <url>] [--model <name>] [--poll-interval <seconds>]
 # =============================================================================
 set -euo pipefail
 disinto_hire_an_agent() {
  local agent_name="${1:-}"
  local role="${2:-}"
  local formula_path=""
  local local_model=""
  local model_name=""
  local poll_interval=""
  if [ -z "$agent_name" ] || [ -z "$role" ]; then
    echo "Error: agent-name and role required" >&2
    echo "Usage: disinto hire-an-agent <agent-name> <role> [--formula <path>] [--local-model <url>] [--model <name>] [--poll-interval <seconds>]" >&2
    exit 1
  fi
  shift 2
  # Parse flags
  while [ $# -gt 0 ]; do
    case "$1" in
      --formula)
        formula_path="$2"
        shift 2
        ;;
      --local-model)
        local_model="$2"
        shift 2
        ;;
      --model)
        model_name="$2"
        shift 2
        ;;
      --poll-interval)
        poll_interval="$2"
        shift 2
        ;;
      *)
        echo "Unknown option: $1" >&2
        exit 1
        ;;
    esac
  done
  # Default formula path — try both naming conventions
  if [ -z "$formula_path" ]; then
    formula_path="${FACTORY_ROOT}/formulas/${role}.toml"
    if [ ! -f "$formula_path" ]; then
      formula_path="${FACTORY_ROOT}/formulas/run-${role}.toml"
    fi
  fi
  # Validate formula exists
  if [ ! -f "$formula_path" ]; then
    echo "Error: formula not found at ${formula_path}" >&2
    exit 1
  fi
  echo "── Hiring agent: ${agent_name} (${role}) ───────────────────────"
  echo "Formula:   ${formula_path}"
  if [ -n "$local_model" ]; then
    echo "Local model: ${local_model}"
    echo "Model name:  ${model_name:-local-model}"
    echo "Poll interval: ${poll_interval:-60}s"
  fi
  # Ensure FORGE_TOKEN is set
  if [ -z "${FORGE_TOKEN:-}" ]; then
    echo "Error: FORGE_TOKEN not set" >&2
    exit 1
  fi
  # Get Forge URL
  local forge_url="${FORGE_URL:-http://localhost:3000}"
  echo "Forge:     ${forge_url}"
  # Step 1: Create user via API (skip if exists)
  echo ""
  echo "Step 1: Creating user '${agent_name}' (if not exists)..."
  local user_pass=""
  local admin_pass=""
  # Read admin password from .env for standalone runs (#184)
  local env_file="${FACTORY_ROOT}/.env"
  if [ -f "$env_file" ] && grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then
    admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-)
  fi
  # Get admin token early (needed for both user creation and password reset)
  local admin_user="disinto-admin"
  admin_pass="${admin_pass:-admin}"
  local admin_token=""
  local admin_token_name
  admin_token_name="temp-token-$(date +%s)"
  admin_token=$(curl -sf -X POST \
    -u "${admin_user}:${admin_pass}" \
    -H "Content-Type: application/json" \
    "${forge_url}/api/v1/users/${admin_user}/tokens" \
    -d "{\"name\":\"${admin_token_name}\",\"scopes\":[\"all\"]}" 2>/dev/null \
    | jq -r '.sha1 // empty') || admin_token=""
  if [ -z "$admin_token" ]; then
    # Token might already exist — try listing
    admin_token=$(curl -sf \
      -u "${admin_user}:${admin_pass}" \
      "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \
      | jq -r '.[0].sha1 // empty') || admin_token=""
  fi
  if [ -z "$admin_token" ]; then
    echo "Error: failed to obtain admin API token" >&2
    echo "  Cannot proceed without admin privileges" >&2
    exit 1
  fi
  if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then
    echo "  User '${agent_name}' already exists"
    # Reset user password so we can get a token (#184)
    user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
    # Use Forgejo CLI to reset password (API PATCH ignores must_change_password in Forgejo 11.x)
    if _forgejo_exec forgejo admin user change-password \
      --username "${agent_name}" \
      --password "${user_pass}" \
      --must-change-password=false >/dev/null 2>&1; then
      echo "  Reset password for existing user '${agent_name}'"
    else
      echo "  Warning: could not reset password for existing user" >&2
    fi
  else
    # Create user using basic auth (admin token fallback would poison subsequent calls)
    # Create the user
    user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
    if curl -sf -X POST \
      -u "${admin_user}:${admin_pass}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/admin/users" \
      -d "{\"username\":\"${agent_name}\",\"password\":\"${user_pass}\",\"email\":\"${agent_name}@${PROJECT_NAME:-disinto}.local\",\"full_name\":\"${agent_name}\",\"active\":true,\"admin\":false,\"must_change_password\":false}" >/dev/null 2>&1; then
      echo "  Created user '${agent_name}'"
    else
      echo "  Warning: failed to create user via admin API" >&2
      # Try alternative: user might already exist
      if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then
        echo "  User '${agent_name}' exists (confirmed)"
      else
        echo "  Error: failed to create user '${agent_name}'" >&2
        exit 1
      fi
    fi
  fi
  # Step 1.5: Generate Forge token for the new/existing user
  echo ""
  echo "Step 1.5: Generating Forge token for '${agent_name}'..."
  # Convert role to uppercase token variable name (e.g., architect -> FORGE_ARCHITECT_TOKEN)
  local role_upper
  role_upper=$(echo "$role" | tr '[:lower:]' '[:upper:]')
  local token_var="FORGE_${role_upper}_TOKEN"
  # Generate token using the user's password (basic auth)
  local agent_token=""
  agent_token=$(curl -sf -X POST \
    -u "${agent_name}:${user_pass}" \
    -H "Content-Type: application/json" \
    "${forge_url}/api/v1/users/${agent_name}/tokens" \
    -d "{\"name\":\"disinto-${agent_name}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \
    | jq -r '.sha1 // empty') || agent_token=""
  if [ -z "$agent_token" ]; then
    # Token name collision — create with timestamp suffix
    agent_token=$(curl -sf -X POST \
      -u "${agent_name}:${user_pass}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/users/${agent_name}/tokens" \
      -d "{\"name\":\"disinto-${agent_name}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \
      | jq -r '.sha1 // empty') || agent_token=""
  fi
  if [ -z "$agent_token" ]; then
    echo "  Warning: failed to create API token for '${agent_name}'" >&2
  else
    # Store token in .env under the role-specific variable name
    if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then
      # Use sed with alternative delimiter and proper escaping for special chars in token
      local escaped_token
      escaped_token=$(printf '%s\n' "$agent_token" | sed 's/[&/\]/\\&/g')
      sed -i "s|^${token_var}=.*|${token_var}=${escaped_token}|" "$env_file"
      echo "  ${agent_name} token updated (${token_var})"
    else
      printf '%s=%s\n' "$token_var" "$agent_token" >> "$env_file"
      echo "  ${agent_name} token saved (${token_var})"
    fi
    export "${token_var}=${agent_token}"
  fi
  # Step 2: Create .profile repo on Forgejo
  echo ""
  echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..."
  if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${agent_name}/.profile" >/dev/null 2>&1; then
    echo "  Repo '${agent_name}/.profile' already exists"
  else
    # Create the repo using the admin API to ensure it's created in the agent's namespace.
    # Using POST /api/v1/user/repos with a user token would create the repo under the
    # authenticated user, which could be wrong if the token belongs to a different user.
    # The admin API POST /api/v1/admin/users/{username}/repos explicitly creates in the
    # specified user's namespace.
    local create_output
    create_output=$(curl -sf -X POST \
      -u "${admin_user}:${admin_pass}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/admin/users/${agent_name}/repos" \
      -d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true
    if echo "$create_output" | grep -q '"id":\|[0-9]'; then
      echo "  Created repo '${agent_name}/.profile' (via admin API)"
    else
      echo "  Error: failed to create repo '${agent_name}/.profile'" >&2
      echo "  Response: ${create_output}" >&2
      exit 1
    fi
  fi
  # Step 3: Clone repo and create initial commit
  echo ""
  echo "Step 3: Cloning repo and creating initial commit..."
  local clone_dir="/tmp/.profile-clone-${agent_name}"
  rm -rf "$clone_dir"
  mkdir -p "$clone_dir"
  # Build authenticated clone URL using basic auth (user_pass is always set in Step 1)
  if [ -z "${user_pass:-}" ]; then
    echo "  Error: no user password available for cloning" >&2
    exit 1
  fi
  local auth_url
  auth_url=$(printf '%s' "$forge_url" | sed "s|://|://${agent_name}:${user_pass}@|")
  auth_url="${auth_url}/${agent_name}/.profile.git"
  # Display unauthenticated URL (auth token only in actual git clone command)
  echo "  Cloning: ${forge_url}/${agent_name}/.profile.git"
  # Try authenticated clone first (required for private repos)
  if ! git clone --quiet "$auth_url" "$clone_dir" 2>/dev/null; then
    echo "  Error: failed to clone repo with authentication" >&2
    echo "  Note: Ensure the user has a valid API token with repository access" >&2
    rm -rf "$clone_dir"
    exit 1
  fi
  # Configure git
  git -C "$clone_dir" config user.name "disinto-admin"
  git -C "$clone_dir" config user.email "disinto-admin@localhost"
  # Create directory structure
  echo "  Creating directory structure..."
  mkdir -p "${clone_dir}/journal"
  mkdir -p "${clone_dir}/knowledge"
  touch "${clone_dir}/journal/.gitkeep"
  touch "${clone_dir}/knowledge/.gitkeep"
  # Copy formula
  echo "  Copying formula..."
  cp "$formula_path" "${clone_dir}/formula.toml"
  # Create README
  if [ ! -f "${clone_dir}/README.md" ]; then
    cat > "${clone_dir}/README.md" <<EOF
 # ${agent_name}'s .profile
 Agent profile repository for ${agent_name}.
 ## Structure
 \`\`\`
 ${agent_name}/.profile/
 ├── formula.toml    # Agent's role formula
 ├── journal/        # Issue-by-issue log files (journal branch)
 │   └── .gitkeep
 ├── knowledge/      # Shared knowledge and best practices
 │   └── .gitkeep
 └── README.md
 \`\`\`
 ## Branches
 - \`main\` — Admin-only merge for formula changes (requires 1 approval)
 - \`journal\` — Agent branch for direct journal entries
  - Agent can push directly to this branch
  - Formula changes must go through PR to \`main\`
 ## Branch protection
 - \`main\`: Protected — requires 1 admin approval for merges
 - \`journal\`: Unprotected — agent can push directly
 EOF
  fi
  # Commit and push
  echo "  Committing and pushing..."
  git -C "$clone_dir" add -A
  if ! git -C "$clone_dir" diff --cached --quiet 2>/dev/null; then
    git -C "$clone_dir" commit -m "chore: initial .profile setup" -q
    git -C "$clone_dir" push origin main >/dev/null 2>&1 || \
      git -C "$clone_dir" push origin master >/dev/null 2>&1 || true
    echo "  Committed: initial .profile setup"
  else
    echo "  No changes to commit"
  fi
  rm -rf "$clone_dir"
  # Step 4: Set up branch protection
  echo ""
  echo "Step 4: Setting up branch protection..."
  # Source branch-protection.sh helper
  local bp_script="${FACTORY_ROOT}/lib/branch-protection.sh"
  if [ -f "$bp_script" ]; then
    # Source required environment
    if [ -f "${FACTORY_ROOT}/lib/env.sh" ]; then
      source "${FACTORY_ROOT}/lib/env.sh"
    fi
    # Set up branch protection for .profile repo
    if source "$bp_script" 2>/dev/null && setup_profile_branch_protection "${agent_name}/.profile" "main"; then
      echo "  Branch protection configured for main branch"
      echo "  - Requires 1 approval before merge"
      echo "  - Admin-only merge enforcement"
      echo "  - Journal branch created for direct agent pushes"
    else
      echo "  Warning: could not configure branch protection (Forgejo API may not be available)"
      echo "  Note: Branch protection can be set up manually later"
    fi
  else
    echo "  Warning: branch-protection.sh not found at ${bp_script}"
  fi
  # Step 5: Create state marker
  echo ""
  echo "Step 5: Creating state marker..."
  local state_dir="${FACTORY_ROOT}/state"
  mkdir -p "$state_dir"
  local state_file="${state_dir}/.${role}-active"
  if [ ! -f "$state_file" ]; then
    touch "$state_file"
    echo "  Created: ${state_file}"
  else
    echo "  State marker already exists: ${state_file}"
  fi
  # Step 6: Set up local model agent (if --local-model specified)
  if [ -n "$local_model" ]; then
    echo ""
    echo "Step 6: Configuring local model agent..."
    # Validate model endpoint is reachable
    echo "  Validating model endpoint: ${local_model}"
    if ! curl -sf --max-time 10 "${local_model}/health" >/dev/null 2>&1; then
      # Try /v1/chat/completions as fallback endpoint check
      if ! curl -sf --max-time 10 "${local_model}/v1/chat/completions" >/dev/null 2>&1; then
        echo "  Warning: model endpoint may not be reachable at ${local_model}"
        echo "  Continuing with configuration..."
      fi
    else
      echo "  Model endpoint is reachable"
    fi
    # Find project TOML
    local project_name="${PROJECT_NAME:-}"
    local toml_file=""
    if [ -n "$project_name" ]; then
      toml_file="${FACTORY_ROOT}/projects/${project_name}.toml"
    fi
    # Fallback: find the first .toml in projects/
    if [ -z "$toml_file" ] || [ ! -f "$toml_file" ]; then
      for f in "${FACTORY_ROOT}/projects/"*.toml; do
        if [ -f "$f" ]; then
          toml_file="$f"
          break
        fi
      done
    fi
    if [ -z "$toml_file" ] || [ ! -f "$toml_file" ]; then
      echo "  Error: no project TOML found in ${FACTORY_ROOT}/projects/" >&2
      echo "  Run 'disinto init' first to create a project config" >&2
      exit 1
    fi
    echo "  Project TOML: ${toml_file}"
    # Derive a safe section name from the agent name (lowercase, alphanumeric+hyphens)
    local section_name
    section_name=$(echo "$agent_name" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/-/g')
    # Default model name if not provided
    local model="${model_name:-local-model}"
    # Write [agents.<name>] section to the project TOML
    local interval="${poll_interval:-60}"
    echo "  Writing [agents.${section_name}] to ${toml_file}..."
    python3 -c '
 import sys, re, pathlib
 toml_path = sys.argv[1]
 section_name = sys.argv[2]
 base_url = sys.argv[3]
 model = sys.argv[4]
 agent_name = sys.argv[5]
 role = sys.argv[6]
 poll_interval = sys.argv[7]
 p = pathlib.Path(toml_path)
 text = p.read_text()
 # Build the new section
 new_section = f"""
 [agents.{section_name}]
 base_url = "{base_url}"
 model = "{model}"
 api_key = "sk-no-key-required"
 roles = ["{role}"]
 forge_user = "{agent_name}"
 compact_pct = 60
 poll_interval = {poll_interval}
 """
 # Check if section already exists and replace it
 pattern = rf"\[agents\.{re.escape(section_name)}\][^\[]*"
 if re.search(pattern, text):
    text = re.sub(pattern, new_section.strip() + "\n", text)
 else:
    # Remove commented-out example [agents.llama] block if present
    text = re.sub(
        r"\n# Local-model agents \(optional\).*?(?=\n# \[mirrors\]|\n\[mirrors\]|\Z)",
        "",
        text,
        flags=re.DOTALL,
    )
    # Append before [mirrors] if it exists, otherwise at end
    mirrors_match = re.search(r"\n(# )?\[mirrors\]", text)
    if mirrors_match:
        text = text[:mirrors_match.start()] + "\n" + new_section + text[mirrors_match.start():]
    else:
        text = text.rstrip() + "\n" + new_section
 p.write_text(text)
 ' "$toml_file" "$section_name" "$local_model" "$model" "$agent_name" "$role" "$interval"
    echo "  Agent config written to TOML"
    # Regenerate docker-compose.yml to include the new agent container
    local compose_file="${FACTORY_ROOT}/docker-compose.yml"
    if [ -f "$compose_file" ]; then
      echo "  Regenerating docker-compose.yml..."
      rm -f "$compose_file"
      # generate_compose is defined in the calling script (bin/disinto) via generators.sh
      # Use _generate_compose_impl directly since generators.sh is already sourced
      local forge_port="3000"
      if [ -n "${FORGE_URL:-}" ]; then
        forge_port=$(printf '%s' "$FORGE_URL" | sed -E 's|.*:([0-9]+)/?$|\1|')
        forge_port="${forge_port:-3000}"
      fi
      _generate_compose_impl "$forge_port"
      echo "  Compose regenerated with agents-${section_name} service"
    fi
    local service_name="agents-${section_name}"
    echo ""
    echo "  Service name: ${service_name}"
    echo "  Model endpoint: ${local_model}"
    echo "  Model: ${model}"
    echo ""
    echo "  To start the agent, run:"
    echo "    docker compose --profile ${service_name} up -d ${service_name}"
  fi
  echo ""
  echo "Done! Agent '${agent_name}' hired for role '${role}'."
  echo "  User:    ${forge_url}/${agent_name}"
  echo "  Repo:    ${forge_url}/${agent_name}/.profile"
  echo "  Formula: ${role}.toml"
 }
--- a/lib/issue-lifecycle.sh
+++ b/lib/issue-lifecycle.sh
@ -43,7 +43,6 @@ _ilc_log() {
 # ---------------------------------------------------------------------------
 # Label ID caching — lookup once per name, cache in globals.
 # Pattern follows ci-helpers.sh (ensure_blocked_label_id).
 # ---------------------------------------------------------------------------
 declare -A _ILC_LABEL_IDS
 _ILC_LABEL_IDS["backlog"]=""
@ -103,7 +102,9 @@ issue_claim() {
    return 1
  fi
-  # Assign to self (Forgejo rejects if already assigned differently)
+  # Assign to self BEFORE adding in-progress label (issue #471).
  # This ordering ensures the assignee is set by the time other pollers
  # see the in-progress label, reducing the stale-detection race window.
  curl -sf -X PATCH \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
--- a/lib/load-project.sh
+++ b/lib/load-project.sh
@ -10,7 +10,6 @@
 #   PROJECT_CONTAINERS, CHECK_PRS, CHECK_DEV_AGENT,
 #   CHECK_PIPELINE_STALL, CI_STALE_MINUTES,
 #   MIRROR_NAMES, MIRROR_URLS, MIRROR_<NAME> (per configured mirror)
 #   (plus backwards-compat aliases: CODEBERG_REPO, CODEBERG_API, CODEBERG_WEB)
 #
 # If no argument given, does nothing (allows poll scripts to work with
 # plain .env fallback for backwards compatibility).
@ -83,7 +82,7 @@ if mirrors:
 # Export parsed variables.
 # Inside the agents container (DISINTO_CONTAINER=1), compose already sets the
 # correct FORGE_URL (http://forgejo:3000) and path vars for the container
-# environment.  The TOML carries host-perspective values (localhost, /home/johba/…)
+# environment.  The TOML carries host-perspective values (localhost, /home/admin/…)
 # that would break container API calls and path resolution.  Skip overriding
 # any env var that is already set when running inside the container.
 while IFS='=' read -r _key _val; do
@ -100,11 +99,9 @@ export FORGE_URL="${FORGE_URL:-http://localhost:3000}"
 if [ -n "$FORGE_REPO" ]; then
  export FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
  export FORGE_WEB="${FORGE_URL}/${FORGE_REPO}"
  # Extract repo owner (first path segment of owner/repo)
  export FORGE_REPO_OWNER="${FORGE_REPO%%/*}"
 fi
 # Backwards-compat aliases
 export CODEBERG_REPO="${FORGE_REPO}"
 export CODEBERG_API="${FORGE_API:-}"
 export CODEBERG_WEB="${FORGE_WEB:-}"
 # Derive PROJECT_REPO_ROOT if not explicitly set
 if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then
@ -116,9 +113,55 @@ if [ -z "${OPS_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then
  export OPS_REPO_ROOT="/home/${USER}/${PROJECT_NAME}-ops"
 fi
 # Inside the container, always derive repo paths from PROJECT_NAME — the TOML
 # carries host-perspective paths that do not exist in the container filesystem.
 if [ "${DISINTO_CONTAINER:-}" = "1" ] && [ -n "${PROJECT_NAME:-}" ]; then
  export PROJECT_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}"
  export OPS_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}-ops"
 fi
 # Derive FORGE_OPS_REPO if not explicitly set
 if [ -z "${FORGE_OPS_REPO:-}" ] && [ -n "${FORGE_REPO:-}" ]; then
  export FORGE_OPS_REPO="${FORGE_REPO}-ops"
 fi
 # Parse [agents.*] sections for local-model agents
 # Exports AGENT_<NAME>_BASE_URL, AGENT_<NAME>_MODEL, AGENT_<NAME>_API_KEY,
 # AGENT_<NAME>_ROLES, AGENT_<NAME>_FORGE_USER, AGENT_<NAME>_COMPACT_PCT
 if command -v python3 &>/dev/null; then
  _AGENT_VARS=$(python3 -c "
 import sys, tomllib
 with open(sys.argv[1], 'rb') as f:
    cfg = tomllib.load(f)
 agents = cfg.get('agents', {})
 for name, config in agents.items():
    if not isinstance(config, dict):
        continue
    # Emit variables in uppercase with the agent name
    if 'base_url' in config:
        print(f'AGENT_{name.upper()}_BASE_URL={config[\"base_url\"]}')
    if 'model' in config:
        print(f'AGENT_{name.upper()}_MODEL={config[\"model\"]}')
    if 'api_key' in config:
        print(f'AGENT_{name.upper()}_API_KEY={config[\"api_key\"]}')
    if 'roles' in config:
        roles = ' '.join(config['roles']) if isinstance(config['roles'], list) else config['roles']
        print(f'AGENT_{name.upper()}_ROLES={roles}')
    if 'forge_user' in config:
        print(f'AGENT_{name.upper()}_FORGE_USER={config[\"forge_user\"]}')
    if 'compact_pct' in config:
        print(f'AGENT_{name.upper()}_COMPACT_PCT={config[\"compact_pct\"]}')
 " "$_PROJECT_TOML" 2>/dev/null) || true
  if [ -n "$_AGENT_VARS" ]; then
    while IFS='=' read -r _key _val; do
      [ -z "$_key" ] && continue
      export "$_key=$_val"
    done <<< "$_AGENT_VARS"
  fi
  unset _AGENT_VARS
 fi
 unset _PROJECT_TOML _PROJECT_VARS _key _val
--- a/lib/ops-setup.sh
+++ b/lib/ops-setup.sh
@ -0,0 +1,409 @@
 #!/usr/bin/env bash
 # ops-setup.sh — Setup ops repository (disinto-ops)
 #
 # Source from bin/disinto:
 #   source "$(dirname "$0")/../lib/ops-setup.sh"
 #
 # Required globals: FORGE_URL, FORGE_TOKEN, FACTORY_ROOT
 # Optional: HUMAN_TOKEN (falls back to FORGE_TOKEN for admin operations)
 #
 # Functions:
 #   setup_ops_repo <forge_url> <ops_slug> <ops_root> [primary_branch] [admin_token]
 #     - Create ops repo on Forgejo if it doesn't exist
 #     - Configure bot collaborators with appropriate permissions
 #     - Clone or initialize ops repo locally
 #     - Seed directory structure (vault, knowledge, evidence)
 #     - Export _ACTUAL_OPS_SLUG for caller to use
 #   migrate_ops_repo <ops_root> [primary_branch]
 #     - Seed missing directories/files on existing ops repos (idempotent)
 #     - Creates .gitkeep files and template content for canonical structure
 #
 # Globals modified:
 #   _ACTUAL_OPS_SLUG - resolved ops repo slug after setup_ops_repo completes
 set -euo pipefail
 setup_ops_repo() {
  local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}"
  local admin_token="${5:-${HUMAN_TOKEN:-${FORGE_TOKEN}}}"
  local org_name="${ops_slug%%/*}"
  local ops_name="${ops_slug##*/}"
  echo ""
  echo "── Ops repo setup ─────────────────────────────────────"
  # Determine the actual ops repo location by searching across possible namespaces
  # This handles cases where the repo was created under a different namespace
  # due to past bugs (e.g., dev-bot/disinto-ops instead of disinto-admin/disinto-ops)
  local actual_ops_slug=""
  local -a possible_namespaces=( "$org_name" "dev-bot" "disinto-admin" )
  local http_code
  for ns in "${possible_namespaces[@]}"; do
    slug="${ns}/${ops_name}"
    if curl -sf --max-time 5 \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${forge_url}/api/v1/repos/${slug}" >/dev/null 2>&1; then
      actual_ops_slug="$slug"
      echo "Ops repo: ${slug} (found at ${slug})"
      break
    fi
  done
  # If not found, try to create it in the configured namespace
  if [ -z "$actual_ops_slug" ]; then
    echo "Creating ops repo in namespace: ${org_name}"
    # Determine if target namespace is a user or an org
    local ns_type=""
    if curl -sf -H "Authorization: token ${admin_token}" \
      "${forge_url}/api/v1/users/${org_name}" >/dev/null 2>&1; then
      # User endpoint exists - check if it's an org
      if curl -sf -H "Authorization: token ${admin_token}" \
        "${forge_url}/api/v1/users/${org_name}" | grep -q '"is_org":true'; then
        ns_type="org"
      else
        ns_type="user"
      fi
    elif curl -sf -H "Authorization: token ${admin_token}" \
      "${forge_url}/api/v1/orgs/${org_name}" >/dev/null 2>&1; then
      # Org endpoint exists
      ns_type="org"
    fi
    local create_endpoint="" via_msg=""
    if [ "$ns_type" = "org" ]; then
      # Org namespace — use org API
      create_endpoint="/api/v1/orgs/${org_name}/repos"
      # Create org if it doesn't exist
      curl -sf -X POST \
        -H "Authorization: token ${admin_token}" \
        -H "Content-Type: application/json" \
        "${forge_url}/api/v1/orgs" \
        -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true
    else
      # User namespace — use admin API (requires admin token)
      create_endpoint="/api/v1/admin/users/${org_name}/repos"
      via_msg=" (via admin API)"
    fi
    if curl -sf -X POST \
      -H "Authorization: token ${admin_token}" \
      -H "Content-Type: application/json" \
      "${forge_url}${create_endpoint}" \
      -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then
      actual_ops_slug="${org_name}/${ops_name}"
      echo "Ops repo: ${actual_ops_slug} created on Forgejo${via_msg}"
    else
      http_code=$(curl -s -o /dev/null -w "%{http_code}" \
        -X POST \
        -H "Authorization: token ${admin_token}" \
        -H "Content-Type: application/json" \
        "${forge_url}${create_endpoint}" \
        -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" 2>/dev/null || echo "0")
      if [ "$http_code" = "201" ]; then
        actual_ops_slug="${org_name}/${ops_name}"
        echo "Ops repo: ${actual_ops_slug} created on Forgejo${via_msg}"
      else
        echo "Error: failed to create ops repo '${org_name}/${ops_name}' (HTTP ${http_code})" >&2
        return 1
      fi
    fi
  fi
  # Configure collaborators on the ops repo
  local bot_user bot_perm
  declare -A bot_permissions=(
    [dev-bot]="write"
    [review-bot]="read"
    [planner-bot]="write"
    [gardener-bot]="write"
    [vault-bot]="write"
    [supervisor-bot]="read"
    [predictor-bot]="read"
    [architect-bot]="write"
  )
  # Add all bot users as collaborators with appropriate permissions
  # vault branch protection (#77) requires:
  # - Admin-only merge to main (enforced by admin_enforced: true)
  # - Bots can push branches and create PRs, but cannot merge
  for bot_user in "${!bot_permissions[@]}"; do
    bot_perm="${bot_permissions[$bot_user]}"
    if curl -sf -X PUT \
      -H "Authorization: token ${admin_token}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/${bot_user}" \
      -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1; then
      echo "  + ${bot_user} = ${bot_perm} collaborator"
    else
      echo "  ! ${bot_user} = ${bot_perm} (already set or failed)"
    fi
  done
  # Add disinto-admin as admin collaborator
  if curl -sf -X PUT \
    -H "Authorization: token ${admin_token}" \
    -H "Content-Type: application/json" \
    "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/disinto-admin" \
    -d '{"permission":"admin"}' >/dev/null 2>&1; then
    echo "  + disinto-admin = admin collaborator"
  else
    echo "  ! disinto-admin = admin (already set or failed)"
  fi
  # Clone ops repo locally if not present — use clean URL, credential helper
  # supplies auth (#604).
  if [ ! -d "${ops_root}/.git" ]; then
    local clone_url="${forge_url}/${actual_ops_slug}.git"
    echo "Cloning: ops repo -> ${ops_root}"
    if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then
      echo "Ops repo: ${actual_ops_slug} cloned successfully"
    else
      echo "Initializing: ops repo at ${ops_root}"
      mkdir -p "$ops_root"
      git -C "$ops_root" init --initial-branch="${primary_branch}" -q
      # Set remote to the actual ops repo location
      git -C "$ops_root" remote add origin "${forge_url}/${actual_ops_slug}.git"
      echo "Ops repo: ${actual_ops_slug} initialized locally"
    fi
  else
    echo "Ops repo: ${ops_root} (already exists locally)"
    # Verify remote is correct
    local current_remote
    current_remote=$(git -C "$ops_root" remote get-url origin 2>/dev/null || true)
    local expected_remote="${forge_url}/${actual_ops_slug}.git"
    if [ -n "$current_remote" ] && [ "$current_remote" != "$expected_remote" ]; then
      echo "  Fixing: remote URL from ${current_remote} to ${expected_remote}"
      git -C "$ops_root" remote set-url origin "$expected_remote"
    fi
  fi
  # Seed directory structure
  local seeded=false
  mkdir -p "${ops_root}/vault/pending"
  mkdir -p "${ops_root}/vault/approved"
  mkdir -p "${ops_root}/vault/fired"
  mkdir -p "${ops_root}/vault/rejected"
  mkdir -p "${ops_root}/knowledge"
  mkdir -p "${ops_root}/evidence/engagement"
  mkdir -p "${ops_root}/evidence/red-team"
  mkdir -p "${ops_root}/evidence/holdout"
  mkdir -p "${ops_root}/evidence/evolution"
  mkdir -p "${ops_root}/evidence/user-test"
  mkdir -p "${ops_root}/sprints"
  [ -f "${ops_root}/sprints/.gitkeep" ] || { touch "${ops_root}/sprints/.gitkeep"; seeded=true; }
  [ -f "${ops_root}/evidence/red-team/.gitkeep" ] || { touch "${ops_root}/evidence/red-team/.gitkeep"; seeded=true; }
  [ -f "${ops_root}/evidence/holdout/.gitkeep" ] || { touch "${ops_root}/evidence/holdout/.gitkeep"; seeded=true; }
  [ -f "${ops_root}/evidence/evolution/.gitkeep" ] || { touch "${ops_root}/evidence/evolution/.gitkeep"; seeded=true; }
  [ -f "${ops_root}/evidence/user-test/.gitkeep" ] || { touch "${ops_root}/evidence/user-test/.gitkeep"; seeded=true; }
  if [ ! -f "${ops_root}/README.md" ]; then
    cat > "${ops_root}/README.md" <<OPSEOF
 # ${ops_name}
 Operational data for the ${ops_name%-ops} project.
 ## Structure
 \`\`\`
 ${ops_name}/
 ├── vault/
 │   ├── pending/          # vault items awaiting approval
 │   ├── approved/         # approved vault items
 │   ├── fired/            # executed vault items
 │   └── rejected/         # rejected vault items
 ├── sprints/              # sprint specs written by architect agent
 ├── knowledge/            # shared agent knowledge and best practices
 ├── evidence/             # engagement data, experiment results
 ├── portfolio.md          # addressables + observables
 ├── prerequisites.md      # dependency graph
 └── RESOURCES.md          # accounts, tokens (refs), infra inventory
 \`\`\`
 > **Note:** Journal directories (journal/planner/ and journal/supervisor/) have been removed from the ops repo. Agent journals are now stored in each agent's .profile repo on Forgejo.
 ## Branch protection
 - \`main\`: 2 reviewers required for vault items
 - Journal/evidence commits may use lighter rules
 OPSEOF
    seeded=true
  fi
  # Copy vault policy.toml template if not already present
  if [ ! -f "${ops_root}/vault/policy.toml" ]; then
    local policy_src="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/policy.toml"
    if [ -f "$policy_src" ]; then
      cp "$policy_src" "${ops_root}/vault/policy.toml"
      echo "  + Copied vault/policy.toml template"
      seeded=true
    fi
  fi
  # Create stub files if they don't exist
  [ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; }
  [ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; }
  [ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; }
  # Commit and push seed content
  if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then
    # Auto-configure repo-local git identity if missing (#778)
    if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then
      git -C "$ops_root" config user.name "disinto-admin"
    fi
    if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then
      git -C "$ops_root" config user.email "disinto-admin@localhost"
    fi
    git -C "$ops_root" add -A
    if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then
      git -C "$ops_root" commit -m "chore: seed ops repo structure" -q
      # Push if remote exists
      if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then
        if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then
          echo "Seeded:  ops repo with initial structure"
        else
          echo "Warning: failed to push seed content to ops repo" >&2
        fi
      fi
    fi
  fi
  # Export resolved slug for the caller to write back to the project TOML
  _ACTUAL_OPS_SLUG="${actual_ops_slug}"
 }
 # migrate_ops_repo — Seed missing ops repo directories and files on existing deployments
 #
 # This function is idempotent — safe to run on every container start.
 # It checks for missing directories/files and creates them with .gitkeep files
 # or template content as appropriate.
 #
 # Called from entrypoint.sh after setup_ops_repo() to bring pre-#407 deployments
 # up to date with the canonical ops repo structure.
 migrate_ops_repo() {
  local ops_root="${1:-}"
  local primary_branch="${2:-main}"
  # Validate ops_root argument
  if [ -z "$ops_root" ]; then
    # Try to determine ops_root from environment or project config
    if [ -n "${OPS_REPO_ROOT:-}" ]; then
      ops_root="${OPS_REPO_ROOT}"
    elif [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then
      source "$(dirname "$0")/load-project.sh" "$PROJECT_TOML"
      ops_root="${OPS_REPO_ROOT:-}"
    fi
  fi
  # Skip if we still don't have an ops root
  if [ -z "$ops_root" ]; then
    echo "migrate_ops_repo: skipping — no ops repo root determined"
    return 0
  fi
  # Verify it's a git repo
  if [ ! -d "${ops_root}/.git" ]; then
    echo "migrate_ops_repo: skipping — ${ops_root} is not a git repo"
    return 0
  fi
  echo ""
  echo "── Ops repo migration ───────────────────────────────────"
  echo "Checking ${ops_root} for missing directories and files..."
  # Change to ops_root directory to ensure all git operations use the correct repo
  # This prevents "fatal: not in a git directory" errors from stray git commands
  local orig_dir
  orig_dir=$(pwd)
  cd "$ops_root" || {
    echo "Error: failed to change to ${ops_root}" >&2
    return 1
  }
  local migrated=false
  # Canonical ops repo structure (post #407)
  # Directories to ensure exist with .gitkeep files
  local -a dir_keepfiles=(
    "vault/pending/.gitkeep"
    "vault/approved/.gitkeep"
    "vault/fired/.gitkeep"
    "vault/rejected/.gitkeep"
    "knowledge/.gitkeep"
    "evidence/engagement/.gitkeep"
    "evidence/red-team/.gitkeep"
    "evidence/holdout/.gitkeep"
    "evidence/evolution/.gitkeep"
    "evidence/user-test/.gitkeep"
    "sprints/.gitkeep"
  )
  # Create missing directories and .gitkeep files
  for keepfile in "${dir_keepfiles[@]}"; do
    if [ ! -f "$keepfile" ]; then
      mkdir -p "$(dirname "$keepfile")"
      touch "$keepfile"
      echo "  + Created: ${keepfile}"
      migrated=true
    fi
  done
  # Template files to create if missing (starter content)
  local -a template_files=(
    "portfolio.md"
    "prerequisites.md"
    "RESOURCES.md"
  )
  for tfile in "${template_files[@]}"; do
    if [ ! -f "$tfile" ]; then
      local title
      title=$(basename "$tfile" | sed 's/\.md$//; s/_/ /g' | sed 's/\b\(.\)/\u\1/g')
      {
        echo "# ${title}"
        echo ""
        echo "## Overview"
        echo ""
        echo "<!-- Add content here -->"
      } > "$tfile"
      echo "  + Created: ${tfile}"
      migrated=true
    fi
  done
  # Commit and push changes if any were made
  if [ "$migrated" = true ]; then
    # Auto-configure repo-local git identity if missing
    if [ -z "$(git config user.name 2>/dev/null)" ]; then
      git config user.name "disinto-admin"
    fi
    if [ -z "$(git config user.email 2>/dev/null)" ]; then
      git config user.email "disinto-admin@localhost"
    fi
    git add -A
    if ! git diff --cached --quiet 2>/dev/null; then
      if ! git commit -m "chore: migrate ops repo structure to canonical layout" -q; then
        echo "Error: failed to commit migration changes" >&2
        cd "$orig_dir"
        return 1
      fi
      # Push if remote exists
      if git remote get-url origin >/dev/null 2>&1; then
        if ! git push origin "${primary_branch}" -q 2>/dev/null; then
          echo "Warning: failed to push migration to ops repo" >&2
        else
          echo "Migrated:  ops repo structure updated and pushed"
        fi
      fi
    fi
  else
    echo "  (all directories and files already present)"
  fi
  # Return to original directory
  cd "$orig_dir"
 }
--- a/lib/pr-lifecycle.sh
+++ b/lib/pr-lifecycle.sh
@ -357,11 +357,18 @@ pr_close() {
  local pr_num="$1"
  _prl_log "closing PR #${pr_num}"
-  curl -sf -X PATCH \
+  local resp http_code
  resp=$(curl -sf -w "\n%{http_code}" -X PATCH \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/pulls/${pr_num}" \
-    -d '{"state":"closed"}' >/dev/null 2>&1 || true
+    -d '{"state":"closed"}' 2>/dev/null) || true
  http_code=$(printf '%s\n' "$resp" | tail -1)
  if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then
    _prl_log "pr_close FAILED: HTTP ${http_code} for PR #${pr_num}"
    return 1
  fi
  _prl_log "PR #${pr_num} closed"
 }
 # ---------------------------------------------------------------------------
@ -398,11 +405,18 @@ pr_walk_to_merge() {
      if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then
        ci_retry_count=$((ci_retry_count + 1))
        _prl_log "infra failure — retriggering CI (retry ${ci_retry_count})"
        local rebase_output rebase_rc
        ( cd "$worktree" && \
          git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \
          git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \
          git rebase "${remote}/${PRIMARY_BRANCH}" && \
-          git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true
+          git push --force-with-lease "$remote" HEAD ) > /tmp/rebase-output-$$ 2>&1
        rebase_rc=$?
        rebase_output=$(cat /tmp/rebase-output-$$)
        rm -f /tmp/rebase-output-$$
        if [ "$rebase_rc" -ne 0 ]; then
          _prl_log "rebase/push failed (exit code $rebase_rc): $(echo "$rebase_output" | tail -5)"
        fi
        continue
      fi
@ -474,11 +488,7 @@ Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push:
          _PR_WALK_EXIT_REASON="merged"
          return 0
        fi
-        if [ "$rc" -eq 2 ]; then
+        # Merge failed (conflict or HTTP 405) — ask agent to rebase
          _PR_WALK_EXIT_REASON="merge_blocked"
          return 1
        fi
        # Merge failed (conflict) — ask agent to rebase
        _prl_log "merge failed — invoking agent to rebase"
        agent_run --resume "$session_id" --worktree "$worktree" \
          "PR #${pr_num} approved but merge failed: ${_PR_MERGE_ERROR:-unknown}
@ -524,8 +534,7 @@ Commit, rebase on ${PRIMARY_BRANCH}, and push:
 # build_phase_protocol_prompt — Generate push/commit instructions for Claude.
 #
 # For the synchronous agent_run architecture: tells Claude how to commit and
-# push (no phase files). For the tmux session architecture, use the
+# push (no phase files).
 # build_phase_protocol_prompt in dev/phase-handler.sh instead.
 #
 # Args: branch [remote]
 # Stdout: instruction text
--- a/lib/profile.sh
+++ b/lib/profile.sh
@ -1,210 +0,0 @@
 #!/usr/bin/env bash
 # profile.sh — Helpers for agent .profile repo management
 #
 # Source after lib/env.sh and lib/formula-session.sh:
 #   source "$(dirname "$0")/../lib/env.sh"
 #   source "$(dirname "$0")/lib/formula-session.sh"
 #   source "$(dirname "$0")/lib/profile.sh"
 #
 # Required globals: FORGE_TOKEN, FORGE_URL, AGENT_IDENTITY, PROFILE_REPO_PATH
 #
 # Functions:
 #   profile_propose_formula   NEW_FORMULA CONTENT REASON — create PR to update formula.toml
 set -euo pipefail
 # Internal log helper
 _profile_log() {
  if declare -f log >/dev/null 2>&1; then
    log "profile: $*"
  else
    printf '[%s] profile: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2
  fi
 }
 # -----------------------------------------------------------------------------
 # profile_propose_formula — Propose a formula change via PR
 #
 # Creates a branch, writes updated formula.toml, opens a PR, and returns PR number.
 # Branch is protected (requires admin approval per #87).
 #
 # Args:
 #   $1 - NEW_FORMULA_CONTENT: The complete new formula.toml content
 #   $2 - REASON: Human-readable explanation of what changed and why
 #
 # Returns:
 #   0 on success, prints PR number to stdout
 #   1 on failure
 #
 # Example:
 #   source "$(dirname "$0")/../lib/env.sh"
 #   source "$(dirname "$0")/lib/formula-session.sh"
 #   source "$(dirname "$0")/lib/profile.sh"
 #   AGENT_IDENTITY="dev-bot"
 #   ensure_profile_repo "$AGENT_IDENTITY"
 #   profile_propose_formula "$new_formula" "Added new prompt pattern for code review"
 # -----------------------------------------------------------------------------
 profile_propose_formula() {
  local new_formula="$1"
  local reason="$2"
  if [ -z "${AGENT_IDENTITY:-}" ]; then
    _profile_log "ERROR: AGENT_IDENTITY not set"
    return 1
  fi
  if [ -z "${PROFILE_REPO_PATH:-}" ]; then
    _profile_log "ERROR: PROFILE_REPO_PATH not set — ensure_profile_repo not called"
    return 1
  fi
  if [ -z "${FORGE_TOKEN:-}" ]; then
    _profile_log "ERROR: FORGE_TOKEN not set"
    return 1
  fi
  if [ -z "${FORGE_URL:-}" ]; then
    _profile_log "ERROR: FORGE_URL not set"
    return 1
  fi
  # Generate short description from reason for branch name
  local short_desc
  short_desc=$(printf '%s' "$reason" | \
    tr '[:upper:]' '[:lower:]' | \
    sed 's/[^a-z0-9 ]//g' | \
    sed 's/  */ /g' | \
    sed 's/^ *//;s/ *$//' | \
    cut -c1-40 | \
    tr ' ' '-')
  if [ -z "$short_desc" ]; then
    short_desc="formula-update"
  fi
  local branch_name="formula/${short_desc}"
  local formula_path="${PROFILE_REPO_PATH}/formula.toml"
  _profile_log "Proposing formula change: ${branch_name}"
  _profile_log "Reason: ${reason}"
  # Ensure we're on main branch and up-to-date
  _profile_log "Fetching .profile repo"
  (
    cd "$PROFILE_REPO_PATH" || return 1
    git fetch origin main --quiet 2>/dev/null || \
    git fetch origin master --quiet 2>/dev/null || true
    # Reset to main/master
    if git checkout main --quiet 2>/dev/null; then
      git pull --ff-only origin main --quiet 2>/dev/null || true
    elif git checkout master --quiet 2>/dev/null; then
      git pull --ff-only origin master --quiet 2>/dev/null || true
    else
      _profile_log "ERROR: Failed to checkout main/master branch"
      return 1
    fi
    # Create and checkout new branch
    git checkout -b "$branch_name" 2>/dev/null || {
      _profile_log "Branch ${branch_name} may already exist"
      git checkout "$branch_name" 2>/dev/null || return 1
    }
    # Write formula.toml
    printf '%s' "$new_formula" > "$formula_path"
    # Commit the change
    git config user.name "${AGENT_IDENTITY}" || true
    git config user.email "${AGENT_IDENTITY}@users.noreply.codeberg.org" || true
    git add "$formula_path"
    git commit -m "formula: ${reason}" --no-verify || {
      _profile_log "No changes to commit (formula unchanged)"
      # Check if branch has any commits
      if git rev-parse HEAD >/dev/null 2>&1; then
        : # branch has commits, continue
      else
        _profile_log "ERROR: Failed to create commit"
        return 1
      fi
    }
    # Push branch
    local remote="${FORGE_REMOTE:-origin}"
    git push --set-upstream "$remote" "$branch_name" --quiet 2>/dev/null || {
      _profile_log "ERROR: Failed to push branch"
      return 1
    }
    _profile_log "Branch pushed: ${branch_name}"
    # Create PR
    local forge_url="${FORGE_URL%/}"
    local api_url="${forge_url}/api/v1/repos/${AGENT_IDENTITY}/.profile"
    local primary_branch="main"
    # Check if main or master is the primary branch
    if ! curl -sf -o /dev/null -w "%{http_code}" \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${api_url}/git/branches/main" 2>/dev/null | grep -q "200"; then
      primary_branch="master"
    fi
    local pr_title="formula: ${reason}"
    local pr_body="# Formula Update
 **Reason:** ${reason}
 ---
 *This PR was auto-generated by ${AGENT_IDENTITY}.*
 "
    local pr_response http_code
    local pr_json
    pr_json=$(jq -n \
      --arg t "$pr_title" \
      --arg b "$pr_body" \
      --arg h "$branch_name" \
      --arg base "$primary_branch" \
      '{title:$t, body:$b, head:$h, base:$base}') || {
      _profile_log "ERROR: Failed to build PR JSON"
      return 1
    }
    pr_response=$(curl -s -w "\n%{http_code}" -X POST \
      -H "Authorization: token ${FORGE_TOKEN}" \
      -H "Content-Type: application/json" \
      "${api_url}/pulls" \
      -d "$pr_json" || true)
    http_code=$(printf '%s\n' "$pr_response" | tail -1)
    pr_response=$(printf '%s\n' "$pr_response" | sed '$d')
    if [ "$http_code" = "201" ] || [ "$http_code" = "200" ]; then
      local pr_num
      pr_num=$(printf '%s' "$pr_response" | jq -r '.number')
      _profile_log "PR created: #${pr_num}"
      printf '%s' "$pr_num"
      return 0
    else
      # Check if PR already exists (409 conflict)
      if [ "$http_code" = "409" ]; then
        local existing_pr
        existing_pr=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
          "${api_url}/pulls?state=open&head=${AGENT_IDENTITY}:formula/${short_desc}" 2>/dev/null | \
          jq -r '.[0].number // empty') || true
        if [ -n "$existing_pr" ]; then
          _profile_log "PR already exists: #${existing_pr}"
          printf '%s' "$existing_pr"
          return 0
        fi
      fi
      _profile_log "ERROR: Failed to create PR (HTTP ${http_code})"
      return 1
    fi
  )
  return $?
 }
--- a/lib/release.sh
+++ b/lib/release.sh
@ -0,0 +1,179 @@
 #!/usr/bin/env bash
 # =============================================================================
 # release.sh — disinto_release() function
 #
 # Handles vault TOML creation, branch setup on ops repo, PR creation,
 # and auto-merge request for a versioned release.
 #
 # Globals expected:
 #   FORGE_URL      - Forge instance URL (e.g. http://localhost:3000)
 #   FORGE_TOKEN    - API token for Forge operations
 #   FORGE_OPS_REPO - Ops repo slug (e.g. disinto-admin/myproject-ops)
 #   FACTORY_ROOT   - Root of the disinto factory
 #   PRIMARY_BRANCH - Primary branch name (e.g. main)
 #
 # Usage:
 #   source "${FACTORY_ROOT}/lib/release.sh"
 #   disinto_release <version>
 # =============================================================================
 set -euo pipefail
 # Source vault.sh for _vault_log helper
 source "${FACTORY_ROOT}/lib/vault.sh"
 # Assert required globals are set before using this module.
 _assert_release_globals() {
  local missing=()
  [ -z "${FORGE_URL:-}" ]      && missing+=("FORGE_URL")
  [ -z "${FORGE_TOKEN:-}" ]    && missing+=("FORGE_TOKEN")
  [ -z "${FORGE_OPS_REPO:-}" ] && missing+=("FORGE_OPS_REPO")
  [ -z "${FACTORY_ROOT:-}" ]   && missing+=("FACTORY_ROOT")
  [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
  if [ "${#missing[@]}" -gt 0 ]; then
    echo "Error: release.sh requires these globals to be set: ${missing[*]}" >&2
    exit 1
  fi
 }
 disinto_release() {
  _assert_release_globals
  local version="${1:-}"
  local formula_path="${FACTORY_ROOT}/formulas/release.toml"
  if [ -z "$version" ]; then
    echo "Error: version required" >&2
    echo "Usage: disinto release <version>" >&2
    echo "Example: disinto release v1.2.0" >&2
    exit 1
  fi
  # Validate version format (must start with 'v' followed by semver)
  if ! echo "$version" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then
    echo "Error: version must be in format v1.2.3 (semver with 'v' prefix)" >&2
    exit 1
  fi
  # Load project config to get FORGE_OPS_REPO
  if [ -z "${PROJECT_NAME:-}" ]; then
    # PROJECT_NAME is unset - detect project TOML from projects/ directory
    local found_toml
    found_toml=$(find "${FACTORY_ROOT}/projects" -maxdepth 1 -name "*.toml" ! -name "*.example" 2>/dev/null | head -1)
    if [ -n "$found_toml" ]; then
      source "${FACTORY_ROOT}/lib/load-project.sh" "$found_toml"
    fi
  else
    local project_toml="${FACTORY_ROOT}/projects/${PROJECT_NAME}.toml"
    if [ -f "$project_toml" ]; then
      source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml"
    fi
  fi
  # Check formula exists
  if [ ! -f "$formula_path" ]; then
    echo "Error: release formula not found at ${formula_path}" >&2
    exit 1
  fi
  # Get the ops repo root
  local ops_root="${FACTORY_ROOT}/../disinto-ops"
  if [ ! -d "${ops_root}/.git" ]; then
    echo "Error: ops repo not found at ${ops_root}" >&2
    echo "  Run 'disinto init' to set up the ops repo first" >&2
    exit 1
  fi
  # Generate a unique ID for the vault item
  local id="release-${version//./}"
  local vault_toml="${ops_root}/vault/actions/${id}.toml"
  # Create vault TOML with the specific version
  cat > "$vault_toml" <<EOF
 # vault/actions/${id}.toml
 # Release vault item for ${version}
 # Auto-generated by disinto release
 id = "${id}"
 formula = "release"
 context = "Release ${version}"
 secrets = ["GITHUB_TOKEN", "CODEBERG_TOKEN"]
 mounts = ["ssh"]
 EOF
  echo "Created vault item: ${vault_toml}"
  # Create a PR to submit the vault item to the ops repo
  local branch_name="release/${version//./}"
  local pr_title="release: ${version}"
  local pr_body="Release ${version}
 This PR creates a vault item for the release of version ${version}.
 ## Changes
 - Added vault item: ${id}.toml
 ## Next Steps
 1. Review this PR
 2. Approve and merge
 3. The vault runner will execute the release formula
 "
  # Create branch from clean primary branch
  (
    cd "$ops_root"
    git checkout "$PRIMARY_BRANCH"
    git pull origin "$PRIMARY_BRANCH"
    git checkout -B "$branch_name" "$PRIMARY_BRANCH"
    # Add and commit only the vault TOML file
    git add "vault/actions/${id}.toml"
    git commit -m "$pr_title" -m "$pr_body" 2>/dev/null || true
    # Push branch
    git push -u origin "$branch_name" 2>/dev/null || {
      echo "Error: failed to push branch" >&2
      exit 1
    }
  )
  # Create PR
  local pr_response
  pr_response=$(curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls" \
    -d "{\"title\":\"${pr_title}\",\"head\":\"${branch_name}\",\"base\":\"${PRIMARY_BRANCH}\",\"body\":\"$(echo "$pr_body" | sed ':a;N;$!ba;s/\n/\\n/g')\"}" 2>/dev/null) || {
    echo "Error: failed to create PR" >&2
    echo "Response: ${pr_response}" >&2
    exit 1
  }
  local pr_number
  pr_number=$(echo "$pr_response" | jq -r '.number')
  local pr_url="${FORGE_URL}/${FORGE_OPS_REPO}/pulls/${pr_number}"
  # Enable auto-merge on the PR — Forgejo will auto-merge after approval
  _vault_log "Enabling auto-merge for PR #${pr_number}"
  curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/merge" \
    -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || {
    echo "Warning: failed to enable auto-merge (may already be enabled or not supported)" >&2
  }
  echo ""
  echo "Release PR created: ${pr_url}"
  echo ""
  echo "Next steps:"
  echo "  1. Review the PR"
  echo "  2. Approve the PR (auto-merge will trigger after approval)"
  echo "  3. The vault runner will execute the release formula"
  echo ""
  echo "After merge, the release will:"
  echo "  1. Tag Forgejo main with ${version}"
  echo "  2. Push tag to mirrors (Codeberg, GitHub)"
  echo "  3. Build and tag the agents Docker image"
  echo "  4. Restart agent containers"
 }
--- a/lib/stack-lock.sh
+++ b/lib/stack-lock.sh
@ -0,0 +1,197 @@
 #!/usr/bin/env bash
 # stack-lock.sh — File-based lock protocol for singleton project stack access
 #
 # Prevents CI pipelines and the reproduce-agent from stepping on each other
 # when sharing a single project stack (e.g. harb docker compose).
 #
 # Lock file: /home/agent/data/locks/<project>-stack.lock
 # Contents:  {"holder": "reproduce-agent-42", "since": "...", "heartbeat": "..."}
 #
 # Protocol:
 #   1. stack_lock_check   — inspect current lock state
 #   2. stack_lock_acquire — wait until lock is free, then claim it
 #   3. stack_lock_release — delete lock file when done
 #
 # Heartbeat: callers must update the heartbeat every 2 minutes while holding
 # the lock by calling stack_lock_heartbeat. A heartbeat older than 10 minutes
 # is considered stale — the next acquire will break it.
 #
 # Usage:
 #   source "$(dirname "$0")/../lib/stack-lock.sh"
 #   stack_lock_acquire "ci-pipeline-$BUILD_NUMBER" "myproject"
 #   trap 'stack_lock_release "myproject"' EXIT
 #   # ... do work ...
 #   stack_lock_release "myproject"
 set -euo pipefail
 STACK_LOCK_DIR="${HOME}/data/locks"
 STACK_LOCK_POLL_INTERVAL=30   # seconds between retry polls
 STACK_LOCK_STALE_SECONDS=600  # 10 minutes — heartbeat older than this = stale
 STACK_LOCK_MAX_WAIT=3600      # 1 hour — give up after this many seconds
 # _stack_lock_path <project>
 #   Print the path of the lock file for the given project.
 _stack_lock_path() {
  local project="$1"
  echo "${STACK_LOCK_DIR}/${project}-stack.lock"
 }
 # _stack_lock_now
 #   Print current UTC timestamp in ISO-8601 format.
 _stack_lock_now() {
  date -u +"%Y-%m-%dT%H:%M:%SZ"
 }
 # _stack_lock_epoch <iso_timestamp>
 #   Convert an ISO-8601 UTC timestamp to a Unix epoch integer.
 _stack_lock_epoch() {
  local ts="$1"
  # Strip trailing Z, replace T with space for `date -d`
  date -u -d "${ts%Z}" +%s 2>/dev/null || date -u -j -f "%Y-%m-%dT%H:%M:%S" "${ts%Z}" +%s 2>/dev/null
 }
 # stack_lock_check <project>
 #   Print lock status to stdout: "free", "held:<holder>", or "stale:<holder>".
 #   Returns 0 in all cases (status is in stdout).
 stack_lock_check() {
  local project="$1"
  local lock_file
  lock_file="$(_stack_lock_path "$project")"
  if [ ! -f "$lock_file" ]; then
    echo "free"
    return 0
  fi
  local holder heartbeat
  holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder","unknown"))' "$lock_file" 2>/dev/null || echo "unknown")
  heartbeat=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("heartbeat",""))' "$lock_file" 2>/dev/null || echo "")
  if [ -z "$heartbeat" ]; then
    echo "stale:${holder}"
    return 0
  fi
  local hb_epoch now_epoch age
  hb_epoch=$(_stack_lock_epoch "$heartbeat" 2>/dev/null || echo "0")
  now_epoch=$(date -u +%s)
  age=$(( now_epoch - hb_epoch ))
  if [ "$age" -gt "$STACK_LOCK_STALE_SECONDS" ]; then
    echo "stale:${holder}"
  else
    echo "held:${holder}"
  fi
 }
 # stack_lock_acquire <holder_id> <project> [max_wait_seconds]
 #   Acquire the lock for <project> on behalf of <holder_id>.
 #   Polls every STACK_LOCK_POLL_INTERVAL seconds.
 #   Breaks stale locks automatically.
 #   Exits non-zero if the lock cannot be acquired within max_wait_seconds.
 stack_lock_acquire() {
  local holder="$1"
  local project="$2"
  local max_wait="${3:-$STACK_LOCK_MAX_WAIT}"
  local lock_file
  lock_file="$(_stack_lock_path "$project")"
  local deadline
  deadline=$(( $(date -u +%s) + max_wait ))
  mkdir -p "$STACK_LOCK_DIR"
  while true; do
    local status
    status=$(stack_lock_check "$project")
    case "$status" in
      free)
        # Write to temp file then rename to avoid partial reads by other processes
        local tmp_lock
        tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX")
        local now
        now=$(_stack_lock_now)
        printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \
          "$holder" "$now" "$now" > "$tmp_lock"
        mv "$tmp_lock" "$lock_file"
        echo "[stack-lock] acquired lock for ${project} as ${holder}" >&2
        return 0
        ;;
      stale:*)
        local stale_holder="${status#stale:}"
        echo "[stack-lock] breaking stale lock held by ${stale_holder} for ${project}" >&2
        rm -f "$lock_file"
        # Loop back immediately to re-check and claim
        ;;
      held:*)
        local cur_holder="${status#held:}"
        local remaining
        remaining=$(( deadline - $(date -u +%s) ))
        if [ "$remaining" -le 0 ]; then
          echo "[stack-lock] timed out waiting for lock on ${project} (held by ${cur_holder})" >&2
          return 1
        fi
        echo "[stack-lock] ${project} locked by ${cur_holder}, waiting ${STACK_LOCK_POLL_INTERVAL}s (${remaining}s left)..." >&2
        sleep "$STACK_LOCK_POLL_INTERVAL"
        ;;
      *)
        echo "[stack-lock] unexpected status '${status}' for ${project}" >&2
        return 1
        ;;
    esac
  done
 }
 # stack_lock_heartbeat <holder_id> <project>
 #   Update the heartbeat timestamp in the lock file.
 #   Should be called every 2 minutes while holding the lock.
 #   No-op if the lock file is absent or held by a different holder.
 stack_lock_heartbeat() {
  local holder="$1"
  local project="$2"
  local lock_file
  lock_file="$(_stack_lock_path "$project")"
  [ -f "$lock_file" ] || return 0
  local current_holder
  current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "")
  [ "$current_holder" = "$holder" ] || return 0
  local since
  since=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("since",""))' "$lock_file" 2>/dev/null || echo "")
  local now
  now=$(_stack_lock_now)
  local tmp_lock
  tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX")
  printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \
    "$holder" "$since" "$now" > "$tmp_lock"
  mv "$tmp_lock" "$lock_file"
 }
 # stack_lock_release <project> [holder_id]
 #   Release the lock for <project>.
 #   If holder_id is provided, only releases if the lock is held by that holder
 #   (prevents accidentally releasing someone else's lock).
 stack_lock_release() {
  local project="$1"
  local holder="${2:-}"
  local lock_file
  lock_file="$(_stack_lock_path "$project")"
  [ -f "$lock_file" ] || return 0
  if [ -n "$holder" ]; then
    local current_holder
    current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "")
    if [ "$current_holder" != "$holder" ]; then
      echo "[stack-lock] refusing to release: lock held by '${current_holder}', not '${holder}'" >&2
      return 1
    fi
  fi
  rm -f "$lock_file"
  echo "[stack-lock] released lock for ${project}" >&2
 }
--- a/lib/vault.sh
+++ b/lib/vault.sh
@ -39,6 +39,60 @@ _vault_ops_api() {
  printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}"
 }
 # -----------------------------------------------------------------------------
 # _vault_commit_direct — Commit low-tier action directly to ops main
 # Args: ops_api tmp_toml_file action_id
 # Uses FORGE_ADMIN_TOKEN to bypass PR workflow
 # -----------------------------------------------------------------------------
 _vault_commit_direct() {
  local ops_api="$1"
  local tmp_toml="$2"
  local action_id="$3"
  local file_path="vault/actions/${action_id}.toml"
  # Use FORGE_ADMIN_TOKEN for direct commit (vault-bot identity)
  local admin_token="${FORGE_ADMIN_TOKEN:-${FORGE_TOKEN}}"
  if [ -z "$admin_token" ]; then
    echo "ERROR: FORGE_ADMIN_TOKEN is required for low-tier commits" >&2
    return 1
  fi
  # Get main branch SHA
  local main_sha
  main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \
    "${ops_api}/git/branches/${PRIMARY_BRANCH:-main}" 2>/dev/null | \
    jq -r '.commit.id // empty' || true)
  if [ -z "$main_sha" ]; then
    main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \
      "${ops_api}/git/refs/heads/${PRIMARY_BRANCH:-main}" 2>/dev/null | \
      jq -r '.object.sha // empty' || true)
  fi
  if [ -z "$main_sha" ]; then
    echo "ERROR: could not get main branch SHA" >&2
    return 1
  fi
  _vault_log "Committing ${file_path} directly to ${PRIMARY_BRANCH:-main}"
  # Encode TOML content as base64
  local encoded_content
  encoded_content=$(base64 -w 0 < "$tmp_toml")
  # Commit directly to main branch using Forgejo content API
  if ! curl -sf -X PUT \
    -H "Authorization: token ${admin_token}" \
    -H "Content-Type: application/json" \
    "${ops_api}/contents/${file_path}" \
    -d "{\"message\":\"vault: add ${action_id} (low-tier)\",\"branch\":\"${PRIMARY_BRANCH:-main}\",\"content\":\"${encoded_content}\",\"committer\":{\"name\":\"vault-bot\",\"email\":\"vault-bot@${FORGE_REPO}\"},\"overwrite\":true}" >/dev/null 2>&1; then
    echo "ERROR: failed to write ${file_path} to ${PRIMARY_BRANCH:-main}" >&2
    return 1
  fi
  _vault_log "Direct commit successful for ${action_id}"
 }
 # -----------------------------------------------------------------------------
 # vault_request — Create a vault PR or return existing one
 # Args: action_id toml_content
@ -59,6 +113,9 @@ vault_request() {
    return 1
  fi
  # Get admin token for API calls (FORGE_ADMIN_TOKEN for low-tier, FORGE_TOKEN otherwise)
  local admin_token="${FORGE_ADMIN_TOKEN:-${FORGE_TOKEN}}"
  # Check if PR already exists for this action
  local existing_pr
  existing_pr=$(pr_find_by_branch "vault/${action_id}" "$(_vault_ops_api)") || true
@ -99,7 +156,34 @@ vault_request() {
    return 1
  fi
-  # Extract values for PR creation
+  # Get ops repo API URL
  local ops_api
  ops_api="$(_vault_ops_api)"
  # Classify the action to determine if PR bypass is allowed
  local classify_script="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/classify.sh"
  local vault_tier
  vault_tier=$("$classify_script" "${VAULT_ACTION_FORMULA:-}" "${VAULT_BLAST_RADIUS_OVERRIDE:-}") || {
    # Classification failed, default to high tier (require PR)
    vault_tier="high"
    _vault_log "Warning: classification failed, defaulting to high tier"
  }
  export VAULT_TIER="${vault_tier}"
  # For low-tier actions, commit directly to ops main using FORGE_ADMIN_TOKEN
  if [ "$vault_tier" = "low" ]; then
    _vault_log "low-tier — committed directly to ops main"
    # Add dispatch_mode field to indicate direct commit (no PR)
    local direct_toml
    direct_toml=$(mktemp /tmp/vault-direct-XXXXXX.toml)
    trap 'rm -f "$tmp_toml" "$direct_toml"' RETURN
    # Prepend dispatch_mode = "direct" to the TOML
    printf 'dispatch_mode = "direct"\n%s\n' "$toml_content" > "$direct_toml"
    _vault_commit_direct "$ops_api" "$direct_toml" "${action_id}"
    return 0
  fi
  # Extract values for PR creation (medium/high tier)
  local pr_title pr_body
  pr_title="vault: ${action_id}"
  pr_body="Vault action: ${action_id}
@ -113,16 +197,12 @@ Secrets: ${VAULT_ACTION_SECRETS:-}
 This vault action has been created by an agent and requires admin approval
 before execution. See the TOML file for details."
  # Get ops repo API URL
  local ops_api
  ops_api="$(_vault_ops_api)"
  # Create branch
  local branch="vault/${action_id}"
  local branch_exists
  branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-    -H "Authorization: token ${FORGE_TOKEN}" \
+    -H "Authorization: token ${admin_token}" \
    "${ops_api}/git/branches/${branch}" 2>/dev/null || echo "0")
  if [ "$branch_exists" != "200" ]; then
@ -131,13 +211,13 @@ before execution. See the TOML file for details."
    # Get the commit SHA of main branch
    local main_sha
-    main_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
+    main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \
      "${ops_api}/git/branches/${PRIMARY_BRANCH:-main}" 2>/dev/null | \
      jq -r '.commit.id // empty' || true)
    if [ -z "$main_sha" ]; then
      # Fallback: get from refs
-      main_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
+      main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \
        "${ops_api}/git/refs/heads/${PRIMARY_BRANCH:-main}" 2>/dev/null | \
        jq -r '.object.sha // empty' || true)
    fi
@ -149,7 +229,7 @@ before execution. See the TOML file for details."
    # Create the branch
    if ! curl -sf -X POST \
-      -H "Authorization: token ${FORGE_TOKEN}" \
+      -H "Authorization: token ${admin_token}" \
      -H "Content-Type: application/json" \
      "${ops_api}/git/branches" \
      -d "{\"ref\":\"${branch}\",\"sha\":\"${main_sha}\"}" >/dev/null 2>&1; then
@ -170,7 +250,7 @@ before execution. See the TOML file for details."
  # Upload file using Forgejo content API
  if ! curl -sf -X PUT \
-    -H "Authorization: token ${FORGE_TOKEN}" \
+    -H "Authorization: token ${admin_token}" \
    -H "Content-Type: application/json" \
    "${ops_api}/contents/${file_path}" \
    -d "{\"message\":\"vault: add ${action_id}\",\"branch\":\"${branch}\",\"content\":\"${encoded_content}\",\"committer\":{\"name\":\"vault-bot\",\"email\":\"vault-bot@${FORGE_REPO}\"},\"overwrite\":true}" >/dev/null 2>&1; then
@ -190,7 +270,7 @@ before execution. See the TOML file for details."
  # Enable auto-merge on the PR — Forgejo will auto-merge after approval
  _vault_log "Enabling auto-merge for PR #${pr_num}"
  curl -sf -X POST \
-    -H "Authorization: token ${FORGE_TOKEN}" \
+    -H "Authorization: token ${admin_token}" \
    -H "Content-Type: application/json" \
    "${ops_api}/pulls/${pr_num}/merge" \
    -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || {
@ -202,18 +282,18 @@ before execution. See the TOML file for details."
  # Get label IDs
  local vault_label_id pending_label_id
-  vault_label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
+  vault_label_id=$(curl -sf -H "Authorization: token ${admin_token}" \
    "${ops_api}/labels" 2>/dev/null | \
    jq -r --arg n "vault" '.[] | select(.name == $n) | .id // empty' || true)
-  pending_label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
+  pending_label_id=$(curl -sf -H "Authorization: token ${admin_token}" \
    "${ops_api}/labels" 2>/dev/null | \
    jq -r --arg n "pending-approval" '.[] | select(.name == $n) | .id // empty' || true)
  # Add labels if they exist
  if [ -n "$vault_label_id" ]; then
    curl -sf -X POST \
-      -H "Authorization: token ${FORGE_TOKEN}" \
+      -H "Authorization: token ${admin_token}" \
      -H "Content-Type: application/json" \
      "${ops_api}/issues/${pr_num}/labels" \
      -d "[{\"id\":${vault_label_id}}]" >/dev/null 2>&1 || true
@ -221,7 +301,7 @@ before execution. See the TOML file for details."
  if [ -n "$pending_label_id" ]; then
    curl -sf -X POST \
-      -H "Authorization: token ${FORGE_TOKEN}" \
+      -H "Authorization: token ${admin_token}" \
      -H "Content-Type: application/json" \
      "${ops_api}/issues/${pr_num}/labels" \
      -d "[{\"id\":${pending_label_id}}]" >/dev/null 2>&1 || true
--- a/planner/AGENTS.md
+++ b/planner/AGENTS.md
@ -1,8 +1,8 @@
-<!-- last-reviewed: 33eb565d7e0c5b7e0159e1720ba7f79126a7e25e -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Planner Agent
 **Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints),
-executed directly from cron via tmux + Claude.
+invoked by the polling loop in `docker/agents/entrypoint.sh` every 12 hours (iteration math at line 210-222) via tmux + Claude.
 Phase 0 (preflight): pull latest code, load persistent memory and prerequisite
 tree from `$OPS_REPO_ROOT/knowledge/planner-memory.md` and `$OPS_REPO_ROOT/prerequisites.md`. Also reads
 all available formulas: factory formulas (`$FACTORY_ROOT/formulas/*.toml`) and
@ -41,16 +41,16 @@ AGENTS.md maintenance is handled by the Gardener.
 prerequisite tree, memory, vault state) live under `$OPS_REPO_ROOT/`.
 Each project manages its own planner state in a separate ops repo.
-**Trigger**: `planner-run.sh` runs daily via cron (accepts an optional project
+**Trigger**: `planner-run.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh`
-TOML argument, defaults to `projects/disinto.toml`). Sources `lib/guard.sh` and
+every 12 hours (iteration math at line 210-222). Accepts an optional project TOML argument,
-calls `check_active planner` first — skips if `$FACTORY_ROOT/state/.planner-active`
+defaults to `projects/disinto.toml`. Sources `lib/guard.sh` and calls `check_active planner`
-is absent. Then creates a tmux session with `claude --model opus`, injects
+first — skips if `$FACTORY_ROOT/state/.planner-active` is absent. Then creates a tmux session
-`formulas/run-planner.toml` as context, monitors the phase file, and cleans up
+with `claude --model opus`, injects `formulas/run-planner.toml` as context, monitors the
-on completion or timeout. No action issues — the planner is a nervous system
+phase file, and cleans up on completion or timeout. No action issues — the planner is a
-component, not work.
+nervous system component, not work.
 **Key files**:
- `planner/planner-run.sh` — Cron wrapper + orchestrator: lock, memory guard,
+- `planner/planner-run.sh` — Polling loop participant + orchestrator: lock, memory guard,
  sources disinto project config, builds structural analysis via `lib/formula-session.sh:build_graph_section()`,
  creates tmux session, injects formula prompt, monitors phase file, handles crash recovery, cleans up
 - `formulas/run-planner.toml` — Execution spec: six steps (preflight,
--- a/planner/planner-run.sh
+++ b/planner/planner-run.sh
@ -1,12 +1,12 @@
 #!/usr/bin/env bash
 # =============================================================================
-# planner-run.sh — Cron wrapper: planner execution via SDK + formula
+# planner-run.sh — Polling-loop wrapper: planner execution via SDK + formula
 #
 # Synchronous bash loop using claude -p (one-shot invocation).
 # No tmux sessions, no phase files — the bash script IS the state machine.
 #
 # Flow:
-#   1. Guards: cron lock, memory check
+#   1. Guards: run lock, memory check
 #   2. Load formula (formulas/run-planner.toml)
 #   3. Context: VISION.md, AGENTS.md, ops:RESOURCES.md, structural graph,
 #      planner memory, journal entries
@ -35,7 +35,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
 # shellcheck source=../lib/agent-sdk.sh
 source "$FACTORY_ROOT/lib/agent-sdk.sh"
-LOG_FILE="$SCRIPT_DIR/planner.log"
+LOG_FILE="${DISINTO_LOG_DIR}/planner/planner.log"
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh
 LOGFILE="$LOG_FILE"
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh
@ -43,21 +43,60 @@ SID_FILE="/tmp/planner-session-${PROJECT_NAME}.sid"
 SCRATCH_FILE="/tmp/planner-${PROJECT_NAME}-scratch.md"
 WORKTREE="/tmp/${PROJECT_NAME}-planner-run"
-log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
+# Override LOG_AGENT for consistent agent identification
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh and env.sh log()
 LOG_AGENT="planner"
 # Override log() to append to planner-specific log file
 # shellcheck disable=SC2034
 log() {
  local agent="${LOG_AGENT:-planner}"
  printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
 }
 # ── Guards ────────────────────────────────────────────────────────────────
 check_active planner
-acquire_cron_lock "/tmp/planner-run.lock"
+acquire_run_lock "/tmp/planner-run.lock"
-check_memory 2000
+memory_guard 2000
 log "--- Planner run start ---"
-# ── Resolve agent identity for .profile repo ────────────────────────────
+# ── Precondition checks: skip if nothing to plan ──────────────────────────
-if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_PLANNER_TOKEN:-}" ]; then
+LAST_SHA_FILE="$FACTORY_ROOT/state/planner-last-sha"
-  AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_PLANNER_TOKEN}" \
+LAST_OPS_SHA_FILE="$FACTORY_ROOT/state/planner-last-ops-sha"
-    "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
+
 CURRENT_SHA=$(git -C "$FACTORY_ROOT" rev-parse HEAD 2>/dev/null || echo "")
 LAST_SHA=$(cat "$LAST_SHA_FILE" 2>/dev/null || echo "")
 # ops repo is required for planner — pull before checking sha
 ensure_ops_repo
 CURRENT_OPS_SHA=$(git -C "$OPS_REPO_ROOT" rev-parse HEAD 2>/dev/null || echo "")
 LAST_OPS_SHA=$(cat "$LAST_OPS_SHA_FILE" 2>/dev/null || echo "")
 unreviewed_count=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
  "${FORGE_API}/issues?labels=prediction/unreviewed&state=open&limit=1" 2>/dev/null | jq length) || unreviewed_count=0
 vision_open=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
  "${FORGE_API}/issues?labels=vision&state=open&limit=1" 2>/dev/null | jq length) || vision_open=0
 if [ "$CURRENT_SHA" = "$LAST_SHA" ] \
   && [ "$CURRENT_OPS_SHA" = "$LAST_OPS_SHA" ] \
   && [ "${unreviewed_count:-0}" -eq 0 ] \
   && [ "${vision_open:-0}" -eq 0 ]; then
  log "no new commits, no ops changes, no unreviewed predictions, no open vision — skipping"
  exit 0
 fi
 log "sha=${CURRENT_SHA:0:8} ops=${CURRENT_OPS_SHA:0:8} unreviewed=${unreviewed_count} vision=${vision_open}"
 # ── Resolve forge remote for git operations ─────────────────────────────
 # Run git operations from the project checkout, not the baked code dir
 cd "$PROJECT_REPO_ROOT"
 resolve_forge_remote
 # ── Resolve agent identity for .profile repo ────────────────────────────
 resolve_agent_identity || true
 # ── Load formula + context ───────────────────────────────────────────────
 load_formula_or_profile "planner" "$FACTORY_ROOT/formulas/run-planner.toml" || exit 1
 build_context_block VISION.md AGENTS.md ops:RESOURCES.md ops:prerequisites.md
@ -65,9 +104,6 @@ build_context_block VISION.md AGENTS.md ops:RESOURCES.md ops:prerequisites.md
 # ── Build structural analysis graph ──────────────────────────────────────
 build_graph_section
 # ── Ensure ops repo is available ───────────────────────────────────────
 ensure_ops_repo
 # ── Read planner memory ─────────────────────────────────────────────────
 MEMORY_BLOCK=""
 MEMORY_FILE="$OPS_REPO_ROOT/knowledge/planner-memory.md"
@ -115,6 +151,11 @@ export CLAUDE_MODEL="opus"
 agent_run --worktree "$WORKTREE" "$PROMPT"
 log "agent_run complete"
 # Persist watermarks so next run can skip if nothing changed
 mkdir -p "$FACTORY_ROOT/state"
 echo "$CURRENT_SHA" > "$LAST_SHA_FILE"
 echo "$CURRENT_OPS_SHA" > "$LAST_OPS_SHA_FILE"
 # Write journal entry post-session
 profile_write_journal "planner-run" "Planner run $(date -u +%Y-%m-%d)" "complete" "" || true
--- a/predictor/AGENTS.md
+++ b/predictor/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: 33eb565d7e0c5b7e0159e1720ba7f79126a7e25e -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Predictor Agent
 **Role**: Abstract adversary (the "goblin"). Runs a 2-step formula
@ -22,14 +22,15 @@ exploit counts as 2 (prediction + action dispatch). The predictor MUST NOT
 emit feature work — only observations challenging claims, exposing gaps,
 and surfacing risks.
-**Trigger**: `predictor-run.sh` runs daily at 06:00 UTC via cron (1h before
+**Trigger**: `predictor-run.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh`
-the planner at 07:00). Sources `lib/guard.sh` and calls `check_active predictor`
+every 24 hours (iteration math at line 224-236). Sources `lib/guard.sh` and calls
-first — skips if `$FACTORY_ROOT/state/.predictor-active` is absent. Also guarded
+`check_active predictor` first — skips if `$FACTORY_ROOT/state/.predictor-active` is absent.
-by PID lock (`/tmp/predictor-run.lock`) and memory check (skips if available
+Also guarded by PID lock (`/tmp/predictor-run.lock`) and memory check (skips if available
-RAM < 2000 MB).
+RAM < 2000 MB). Note: the 24h cadence is iteration-based, not anchored to 06:00 UTC —
 drifts on container restart.
 **Key files**:
- `predictor/predictor-run.sh` — Cron wrapper + orchestrator: active-state guard,
+- `predictor/predictor-run.sh` — Polling loop participant + orchestrator: active-state guard,
  lock, memory guard, sources disinto project config, builds structural analysis
  via `lib/formula-session.sh:build_graph_section()` (full-project scan — results
  included in prompt as `## Structural analysis`; failures non-fatal), builds
@ -44,7 +45,7 @@ RAM < 2000 MB).
 - `FORGE_TOKEN`, `FORGE_PREDICTOR_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`, `OPS_REPO_ROOT`
 - `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by predictor-run.sh)
-**Lifecycle**: predictor-run.sh (daily 06:00 cron) → lock + memory guard →
+**Lifecycle**: predictor-run.sh (invoked by polling loop every 24h) → lock + memory guard →
 load formula + context (AGENTS.md, VISION.md from code repo; RESOURCES.md, prerequisites.md from ops repo)
 → create tmux session → Claude fetches prediction history (open + closed) →
 reviews track record (actioned/dismissed/watching) → finds weaknesses
--- a/predictor/predictor-run.sh
+++ b/predictor/predictor-run.sh
@ -1,12 +1,12 @@
 #!/usr/bin/env bash
 # =============================================================================
-# predictor-run.sh — Cron wrapper: predictor execution via SDK + formula
+# predictor-run.sh — Polling-loop wrapper: predictor execution via SDK + formula
 #
 # Synchronous bash loop using claude -p (one-shot invocation).
 # No tmux sessions, no phase files — the bash script IS the state machine.
 #
 # Flow:
-#   1. Guards: cron lock, memory check
+#   1. Guards: run lock, memory check
 #   2. Load formula (formulas/run-predictor.toml)
 #   3. Context: AGENTS.md, ops:RESOURCES.md, VISION.md, structural graph
 #   4. agent_run(worktree, prompt) → Claude analyzes, writes to ops repo
@ -14,7 +14,7 @@
 # Usage:
 #   predictor-run.sh [projects/disinto.toml]   # project config (default: disinto)
 #
-# Cron: 0 6 * * * cd /path/to/dark-factory && bash predictor/predictor-run.sh
+# Called by: entrypoint.sh polling loop (daily)
 # =============================================================================
 set -euo pipefail
@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
 # shellcheck source=../lib/agent-sdk.sh
 source "$FACTORY_ROOT/lib/agent-sdk.sh"
-LOG_FILE="$SCRIPT_DIR/predictor.log"
+LOG_FILE="${DISINTO_LOG_DIR}/predictor/predictor.log"
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh
 LOGFILE="$LOG_FILE"
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh
@ -44,20 +44,32 @@ SID_FILE="/tmp/predictor-session-${PROJECT_NAME}.sid"
 SCRATCH_FILE="/tmp/predictor-${PROJECT_NAME}-scratch.md"
 WORKTREE="/tmp/${PROJECT_NAME}-predictor-run"
-log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
+# Override LOG_AGENT for consistent agent identification
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh and env.sh log()
 LOG_AGENT="predictor"
 # Override log() to append to predictor-specific log file
 # shellcheck disable=SC2034
 log() {
  local agent="${LOG_AGENT:-predictor}"
  printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
 }
 # ── Guards ────────────────────────────────────────────────────────────────
 check_active predictor
-acquire_cron_lock "/tmp/predictor-run.lock"
+acquire_run_lock "/tmp/predictor-run.lock"
-check_memory 2000
+memory_guard 2000
 log "--- Predictor run start ---"
 # ── Resolve forge remote for git operations ─────────────────────────────
 # Run git operations from the project checkout, not the baked code dir
 cd "$PROJECT_REPO_ROOT"
 resolve_forge_remote
 # ── Resolve agent identity for .profile repo ────────────────────────────
-if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_PREDICTOR_TOKEN:-}" ]; then
+resolve_agent_identity || true
  AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_PREDICTOR_TOKEN}" \
    "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
 fi
 # ── Load formula + context ───────────────────────────────────────────────
 load_formula_or_profile "predictor" "$FACTORY_ROOT/formulas/run-predictor.toml" || exit 1
--- a/projects/disinto.toml.example
+++ b/projects/disinto.toml.example
@ -5,7 +5,7 @@
 name            = "disinto"
 repo            = "johba/disinto"
-ops_repo        = "johba/disinto-ops"
+ops_repo        = "disinto-admin/disinto-ops"
 forge_url       = "http://localhost:3000"
 repo_root       = "/home/YOU/dark-factory"
 ops_repo_root   = "/home/YOU/disinto-ops"
@ -23,6 +23,24 @@ check_prs            = true
 check_dev_agent      = true
 check_pipeline_stall = false
 # Local-model agents (optional) — configure to use llama-server or similar
 # for local LLM inference. Each agent gets its own container with isolated
 # credentials and configuration.
 #
 # When enabled, `disinto init` automatically:
 #   1. Creates a Forgejo bot user matching agents.llama.forge_user
 #   2. Generates FORGE_TOKEN_<BOT> and FORGE_PASS_<BOT> (stored in .env.enc)
 #   3. Adds the bot user as a write collaborator on the project repo
 #
 # [agents.llama]
 #   base_url = "http://10.10.10.1:8081"
 #   model = "unsloth/Qwen3.5-35B-A3B"
 #   api_key = "sk-no-key-required"
 #   roles = ["dev"]
 #   forge_user = "dev-qwen"
 #   compact_pct = 60
 #   poll_interval = 60
 # [mirrors]
 # github   = "git@github.com:johba/disinto.git"
 # codeberg = "git@codeberg.org:johba/disinto.git"
--- a/review/AGENTS.md
+++ b/review/AGENTS.md
@ -1,16 +1,29 @@
-<!-- last-reviewed: 33eb565d7e0c5b7e0159e1720ba7f79126a7e25e -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Review Agent
 **Role**: AI-powered PR review — post structured findings and formal
 approve/request-changes verdicts to forge.
-**Trigger**: `review-poll.sh` runs every 10 min via cron. It scans open PRs
+**Trigger**: `review-poll.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh`
-whose CI has passed and that lack a review for the current HEAD SHA, then
+every 5 minutes (iteration math at line 163-167). It scans open PRs whose CI has passed and
-spawns `review-pr.sh <pr-number>`.
+that lack a review for the current HEAD SHA, then spawns `review-pr.sh <pr-number>`.
 **Key files**:
- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. **Circuit breaker**: counts existing `<!-- review-error: <sha> -->` comments; skips a PR if ≥3 consecutive errors for the same HEAD SHA (prevents flooding on repeated review failures).
+- `review/review-poll.sh` — Polling loop participant: finds unreviewed PRs with passing CI.
- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Before starting the session, runs `lib/build-graph.py --changed-files <PR files>` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it.
+Invoked by `docker/agents/entrypoint.sh` every 5 minutes. Sources `lib/guard.sh` and calls
 `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent.
 **Circuit breaker**: counts existing `<!-- review-error: <sha> -->` comments; skips a PR
 if ≥3 consecutive errors for the same HEAD SHA (prevents flooding on repeated review failures).
 - `review/review-pr.sh` — Polling loop participant: Creates/reuses a tmux session
 (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output,
 posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing
 tech debt. **cd at startup**: changes to `$PROJECT_REPO_ROOT` early in the script — before
 any git commands — because the factory root is not a git repo after image rebuild (#408).
 Calls `resolve_forge_remote()` at startup to determine the correct git remote name (avoids
 hardcoded 'origin'). Before starting the session, runs `lib/build-graph.py --changed-files
 <PR files>` and appends the JSON structural analysis (affected objectives, orphaned
 prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review
 proceeds without it.
 **Environment variables consumed**:
 - `FORGE_TOKEN` — Dev-agent token (must not be the same account as FORGE_REVIEW_TOKEN)
--- a/review/review-poll.sh
+++ b/review/review-poll.sh
@ -23,8 +23,15 @@ LOGFILE="${DISINTO_LOG_DIR}/review/review-poll.log"
 MAX_REVIEWS=3
 REVIEW_IDLE_TIMEOUT=14400  # 4h: kill review session if idle
 # Override LOG_AGENT for consistent agent identification
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh and env.sh log()
 LOG_AGENT="review"
 # Override log() to append to review-specific log file
 # shellcheck disable=SC2034
 log() {
-  printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
+  local agent="${LOG_AGENT:-review}"
  printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOGFILE"
 }
 # Log rotation
@ -126,10 +133,11 @@ if [ -n "$REVIEW_SIDS" ]; then
    log "  #${pr_num} re-review: new commits (${reviewed_sha:0:7}→${current_sha:0:7})"
-    if "${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1; then
+    review_output=$("${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1) && review_rc=0 || review_rc=$?
    if [ "$review_rc" -eq 0 ]; then
      REVIEWED=$((REVIEWED + 1))
    else
-      log "  #${pr_num} re-review failed"
+      log "  #${pr_num} re-review failed (exit code $review_rc): $(echo "$review_output" | tail -3)"
    fi
    [ "$REVIEWED" -lt "$MAX_REVIEWS" ] || break
@ -180,10 +188,11 @@ while IFS= read -r line; do
  log "  #${PR_NUM} error check: ${ERROR_COMMENTS:-0} prior error(s) for ${PR_SHA:0:7}"
-  if "${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1; then
+  review_output=$("${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1) && review_rc=0 || review_rc=$?
  if [ "$review_rc" -eq 0 ]; then
    REVIEWED=$((REVIEWED + 1))
  else
-    log "  #${PR_NUM} review failed"
+    log "  #${PR_NUM} review failed (exit code $review_rc): $(echo "$review_output" | tail -3)"
  fi
  if [ "$REVIEWED" -ge "$MAX_REVIEWS" ]; then
--- a/review/review-pr.sh
+++ b/review/review-pr.sh
@ -35,6 +35,10 @@ git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true
 # --- Config ---
 PR_NUMBER="${1:?Usage: review-pr.sh <pr-number> [--force]}"
 # Change to project repo early — required before any git commands
 # (factory root is not a git repo after image rebuild)
 cd "${PROJECT_REPO_ROOT}"
 FORCE="${2:-}"
 API="${FORGE_API}"
 LOGFILE="${DISINTO_LOG_DIR}/review/review.log"
@ -58,13 +62,15 @@ if [ -f "$LOGFILE" ] && [ "$(stat -c%s "$LOGFILE" 2>/dev/null || echo 0)" -gt 10
  mv "$LOGFILE" "$LOGFILE.old"
 fi
 # =============================================================================
 # RESOLVE FORGE REMOTE FOR GIT OPERATIONS
 # =============================================================================
 resolve_forge_remote
 # =============================================================================
 # RESOLVE AGENT IDENTITY FOR .PROFILE REPO
 # =============================================================================
-if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then
+resolve_agent_identity || true
  AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
 fi
 # =============================================================================
 # MEMORY GUARD
@ -131,7 +137,7 @@ PREV_REV=$(printf '%s' "$ALL_COMMENTS" | jq -r --arg s "$PR_SHA" \
 if [ -n "$PREV_REV" ] && [ "$PREV_REV" != "null" ]; then
  PREV_BODY=$(printf '%s' "$PREV_REV" | jq -r '.body')
  PREV_SHA=$(printf '%s' "$PREV_BODY" | grep -oP '<!-- reviewed: \K[a-f0-9]+' | head -1)
-  cd "${PROJECT_REPO_ROOT}"; git fetch origin "$PR_HEAD" 2>/dev/null || true
+  cd "${PROJECT_REPO_ROOT}"; git fetch "${FORGE_REMOTE}" "$PR_HEAD" 2>/dev/null || true
  INCR=$(git diff "${PREV_SHA}..${PR_SHA}" 2>/dev/null | head -c "$MAX_DIFF") || true
  if [ -n "$INCR" ]; then
    IS_RE_REVIEW=true; log "re-review: previous at ${PREV_SHA:0:7}"
@ -161,12 +167,11 @@ DNOTE=""; [ "$FSIZE" -gt "$MAX_DIFF" ] && DNOTE=" (truncated from ${FSIZE} bytes
 # =============================================================================
 # WORKTREE SETUP
 # =============================================================================
-cd "${PROJECT_REPO_ROOT}"
+git fetch "${FORGE_REMOTE}" "$PR_HEAD" 2>/dev/null || true
 git fetch origin "$PR_HEAD" 2>/dev/null || true
 if [ -d "$WORKTREE" ]; then
  cd "$WORKTREE"; git checkout --detach "$PR_SHA" 2>/dev/null || {
-    cd "${PROJECT_REPO_ROOT}"; worktree_cleanup "$WORKTREE"
+    worktree_cleanup "$WORKTREE"
    git worktree add "$WORKTREE" "$PR_SHA" --detach 2>/dev/null; }
 else
  git worktree add "$WORKTREE" "$PR_SHA" --detach 2>/dev/null
@ -222,6 +227,7 @@ PROMPT=$(cat "${REVIEW_TMPDIR}/prompt.md")
 status "running review"
 rm -f "$OUTPUT_FILE"
 export CLAUDE_MODEL="sonnet"
 export CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-900}"   # 15 min — reviews shouldn't take longer
 if [ "$IS_RE_REVIEW" = true ] && [ -n "$_AGENT_SESSION_ID" ]; then
  agent_run --resume "$_AGENT_SESSION_ID" --worktree "$WORKTREE" "$PROMPT"
--- a/site/collect-engagement.sh
+++ b/site/collect-engagement.sh
@ -122,7 +122,8 @@ PAGES=$(printf '%s\n' "$PARSED" | jq -c '
 ')
 TOTAL_REQUESTS=$(printf '%s\n' "$PARSED" | wc -l | tr -d ' ')
-PAGE_VIEWS=$(printf '%s\n' "$PAGES" | grep -c . || echo 0)
+PAGE_VIEWS=$(printf '%s\n' "$PAGES" | grep -c . || true)
 PAGE_VIEWS="${PAGE_VIEWS:-0}"
 UNIQUE_VISITORS=$(printf '%s\n' "$PAGES" | jq -r '.ip' | sort -u | wc -l | tr -d ' ')
 # Top pages by hit count
--- a/site/docs/architecture.html
+++ b/site/docs/architecture.html
@ -370,32 +370,32 @@
        <div class="agent-card">
          <div class="name">dev-agent</div>
          <div class="role">Picks up backlog issues, <strong>implements code</strong> in isolated git worktrees, opens PRs. Runs as a persistent tmux session.</div>
-          <div class="trigger">Cron: every 5 min</div>
+          <div class="trigger">Polling loop: every 5 min</div>
        </div>
        <div class="agent-card">
          <div class="name">review-agent</div>
          <div class="role"><strong>Reviews PRs</strong> against project conventions. Approves clean PRs, requests specific changes on others.</div>
-          <div class="trigger">Cron: every 5 min</div>
+          <div class="trigger">Polling loop: every 5 min</div>
        </div>
        <div class="agent-card">
          <div class="name">planner</div>
          <div class="role">Reads VISION.md and repo state. <strong>Creates issues</strong> for gaps between where the project is and where it should be.</div>
-          <div class="trigger">Cron: weekly</div>
+          <div class="trigger">Polling loop: weekly</div>
        </div>
        <div class="agent-card">
          <div class="name">gardener</div>
          <div class="role"><strong>Grooms the backlog.</strong> Closes duplicates, promotes tech-debt issues, ensures issues are well-structured.</div>
-          <div class="trigger">Cron: every 6 hours</div>
+          <div class="trigger">Polling loop: every 6 hours</div>
        </div>
        <div class="agent-card">
          <div class="name">supervisor</div>
          <div class="role"><strong>Monitors factory health.</strong> Kills stale sessions, manages disk/memory, escalates persistent failures.</div>
-          <div class="trigger">Cron: every 10 min</div>
+          <div class="trigger">Polling loop: every 10 min</div>
        </div>
        <div class="agent-card">
          <div class="name">predictor</div>
          <div class="role">Detects <strong>infrastructure patterns</strong> &mdash; recurring failures, resource trends, emerging issues. Files predictions for triage.</div>
-          <div class="trigger">Cron: daily</div>
+          <div class="trigger">Polling loop: daily</div>
        </div>
        <div class="agent-card">
          <div class="name">vault</div>
@ -473,7 +473,7 @@
        <div class="label">Tech stack</div>
        <p><strong>Bash scripts</strong> &mdash; every agent is a shell script. No compiled binaries, no runtimes to install.</p>
        <p><strong>Claude CLI</strong> &mdash; AI is invoked via <code>claude -p</code> (one-shot) or <code>claude</code> (persistent tmux sessions).</p>
-        <p><strong>Cron</strong> &mdash; agents are triggered by cron jobs, not a daemon. Pull-based, not push-based.</p>
+        <p><strong>Polling loop</strong> &mdash; agents are triggered by a <code>while true</code> loop in <code>entrypoint.sh</code>. Pull-based, not push-based.</p>
        <p><strong>Forgejo + Woodpecker</strong> &mdash; git hosting and CI. All state lives in git and the issue tracker. No external databases.</p>
        <p><strong>Single VPS</strong> &mdash; runs on an 8 GB server. Flat cost, no scaling surprises.</p>
      </div>
@ -485,11 +485,11 @@
      <div class="principles">
        <div class="principle">
          <div class="id">AD-001</div>
-          <div class="text"><strong>Nervous system runs from cron, not action issues.</strong> Planner, predictor, gardener, supervisor run directly. They create work, they don't become work.</div>
+          <div class="text"><strong>Nervous system runs from a polling loop, not action issues.</strong> Planner, predictor, gardener, supervisor run directly. They create work, they don't become work.</div>
        </div>
        <div class="principle">
          <div class="id">AD-002</div>
-          <div class="text"><strong>Single-threaded pipeline per project.</strong> One dev issue at a time. No new work while a PR awaits CI or review.</div>
+          <div class="text"><strong>Concurrency is bounded per LLM backend, not per project.</strong> One concurrent Claude session per OAuth credential pool; one concurrent session per llama-server instance. Containers with disjoint backends may run in parallel.</div>
        </div>
        <div class="principle">
          <div class="id">AD-003</div>
@ -514,9 +514,9 @@
 disinto/
 ├── <span class="agent-name">dev/</span>           dev-poll.sh, dev-agent.sh, phase-handler.sh
 ├── <span class="agent-name">review/</span>        review-poll.sh, review-pr.sh
-├── <span class="agent-name">gardener/</span>      gardener-run.sh (cron executor)
+├── <span class="agent-name">gardener/</span>      gardener-run.sh (polling-loop executor)
-├── <span class="agent-name">predictor/</span>     predictor-run.sh (daily cron executor)
+├── <span class="agent-name">predictor/</span>     predictor-run.sh (daily polling-loop executor)
-├── <span class="agent-name">planner/</span>       planner-run.sh (weekly cron executor)
+├── <span class="agent-name">planner/</span>       planner-run.sh (weekly polling-loop executor)
 ├── <span class="agent-name">supervisor/</span>    supervisor-run.sh (health monitoring)
 ├── <span class="agent-name">vault/</span>         vault-env.sh (vault redesign in progress, see #73-#77)
 ├── <span class="agent-name">lib/</span>           env.sh, agent-session.sh, ci-helpers.sh
--- a/site/docs/quickstart.html
+++ b/site/docs/quickstart.html
@ -353,7 +353,7 @@ cp .env.example .env
        <span class="step-num">2</span>
        Initialize your project
      </div>
-      <p><code>disinto init</code> starts the full stack (Forgejo + Woodpecker CI), creates your repo, clones it locally, generates the project config, adds labels, and installs cron jobs &mdash; all in one command.</p>
+      <p><code>disinto init</code> starts the full stack (Forgejo + Woodpecker CI), creates your repo, clones it locally, generates the project config, adds labels, and configures agent scheduling &mdash; all in one command.</p>
 <pre><code>bin/disinto init user/your-project</code></pre>
      <p>Use <code>disinto up</code> / <code>disinto down</code> later to restart or stop the stack.</p>
      <div class="expected">
@ -373,7 +373,7 @@ Creating labels on you/your-project...
  + vision
  + action
 Created: /home/you/your-project/VISION.md
-Cron entries installed
+Agent scheduling configured
 Done. Project your-project is ready.</code>
      </div>
      <p>Optional flags:</p>
--- a/site/index.html
+++ b/site/index.html
@ -712,7 +712,7 @@
        <a href="/dashboard">dashboard</a>
      </div>
      <div class="under-hood">
-        Under the hood: dev, review, planner, gardener, supervisor, predictor, action, vault, exec — nine agents orchestrated by cron and bash.
+        Under the hood: dev, review, planner, gardener, supervisor, predictor, action, vault, exec — nine agents orchestrated by a polling loop and bash.
      </div>
    </div>
--- a/state/.gitignore
+++ b/state/.gitignore
@ -1,2 +1,4 @@
 # Active-state files are runtime state, not committed
 .*-active
 # Supervisor is always active in the edge container — committed guard file
 !.supervisor-active
--- a/state/.supervisor-active
+++ b/state/.supervisor-active
--- a/supervisor/AGENTS.md
+++ b/supervisor/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: 33eb565d7e0c5b7e0159e1720ba7f79126a7e25e -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Supervisor Agent
 **Role**: Health monitoring and auto-remediation, executed as a formula-driven
@ -7,21 +7,20 @@ then runs an interactive Claude session (sonnet) that assesses health, auto-fixe
 issues, and writes a daily journal. When blocked on external
 resources or human decisions, files vault items instead of escalating directly.
-**Trigger**: `supervisor-run.sh` runs every 20 min via cron. Sources `lib/guard.sh`
+**Trigger**: `supervisor-run.sh` is invoked by the polling loop in `docker/edge/entrypoint-edge.sh`
-and calls `check_active supervisor` first — skips if
+every 20 minutes (line 50-53). Sources `lib/guard.sh` and calls `check_active supervisor` first
-`$FACTORY_ROOT/state/.supervisor-active` is absent. Then creates a tmux session
+— skips if `$FACTORY_ROOT/state/.supervisor-active` is absent. Then runs `claude -p` via
-with `claude --model sonnet`, injects `formulas/run-supervisor.toml` with
+`agent-sdk.sh`, injects `formulas/run-supervisor.toml` with pre-collected metrics as context,
-pre-collected metrics as context, monitors the phase file, and cleans up on
+and cleans up on completion or timeout (20 min max session). Note: the supervisor runs in the
-completion or timeout (20 min max session). No action issues — the supervisor
+**edge container** (`entrypoint-edge.sh`), not the agent container — this distinction matters
-runs directly from cron like the planner and predictor.
+for operators debugging the factory.
 **Key files**:
- `supervisor/supervisor-run.sh` — Cron wrapper + orchestrator: lock, memory guard,
+- `supervisor/supervisor-run.sh` — Polling loop participant + orchestrator: lock, memory guard,
-  runs preflight.sh, sources disinto project config, creates tmux session, injects
+  runs preflight.sh, sources disinto project config, runs claude -p via agent-sdk.sh,
-  formula prompt with metrics, monitors phase file, handles crash recovery via
+  injects formula prompt with metrics, handles crash recovery
  `run_formula_and_monitor`
 - `supervisor/preflight.sh` — Data collection: system resources (RAM, disk, swap,
-  load), Docker status, active tmux sessions + phase files, lock files, agent log
+  load), Docker status, active sessions + phase files, lock files, agent log
  tails, CI pipeline status, open PRs, issue counts, stale worktrees, blocked
  issues. Also performs **stale phase cleanup**: scans `/tmp/*-session-*.phase`
  files for `PHASE:escalate` entries and auto-removes any whose linked issue
@ -33,8 +32,6 @@ runs directly from cron like the planner and predictor.
  Claude evaluates all metrics and takes actions in a single interactive session
 - `$OPS_REPO_ROOT/knowledge/*.md` — Domain-specific remediation guides (memory,
  disk, CI, git, dev-agent, review-agent, forge)
 - `supervisor/supervisor-poll.sh` — Legacy bash orchestrator (superseded by
  supervisor-run.sh + formula)
 **Alert priorities**: P0 (memory crisis), P1 (disk), P2 (factory stopped/stalled),
 P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping).
@ -44,6 +41,12 @@ P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping).
 - `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by supervisor-run.sh)
 - `WOODPECKER_TOKEN`, `WOODPECKER_SERVER`, `WOODPECKER_DB_PASSWORD`, `WOODPECKER_DB_USER`, `WOODPECKER_DB_HOST`, `WOODPECKER_DB_NAME` — CI database queries
-**Lifecycle**: supervisor-run.sh (cron */20) → lock + memory guard → run
+**Degraded mode (Issue #544)**: When `OPS_REPO_ROOT` is not set or the directory doesn't exist, the supervisor runs in degraded mode:
-preflight.sh (collect metrics) → load formula + context → create tmux
+- Uses bundled knowledge files from `$FACTORY_ROOT/knowledge/` instead of ops repo playbooks
-session → Claude assesses health, auto-fixes, writes journal → `PHASE:done`.
+- Writes journal locally to `$FACTORY_ROOT/state/supervisor-journal/` (not committed to git)
 - Files vault items locally to `$PROJECT_REPO_ROOT/vault/pending/`
 - Logs a WARNING message at startup indicating degraded mode
 **Lifecycle**: supervisor-run.sh (invoked by polling loop every 20min, `check_active supervisor`)
 → lock + memory guard → run preflight.sh (collect metrics) → load formula + context → run
 claude -p via agent-sdk.sh → Claude assesses health, auto-fixes, writes journal → `PHASE:done`.
--- a/supervisor/preflight.sh
+++ b/supervisor/preflight.sh
@ -146,27 +146,24 @@ done
 echo "## CI Pipelines (${PROJECT_NAME})"
-_recent_ci=$(wpdb -A -c "
+# Fetch pipelines via Woodpecker REST API (database-driver-agnostic)
-  SELECT number, status, branch,
+_pipelines=$(woodpecker_api "/repos/${WOODPECKER_REPO_ID}/pipelines?perPage=50" 2>/dev/null || echo '[]')
-         ROUND(EXTRACT(EPOCH FROM (to_timestamp(finished) - to_timestamp(started)))/60)::int as dur_min
+_now=$(date +%s)
-  FROM pipelines
+
-  WHERE repo_id = ${WOODPECKER_REPO_ID}
+# Recent pipelines (finished in last 24h = 86400s), sorted by number DESC
-    AND finished > 0
+_recent_ci=$(echo "$_pipelines" | jq -r --argjson now "$_now" '
-    AND to_timestamp(finished) > now() - interval '24 hours'
+  [.[] | select(.finished > 0) | select(($now - .finished) < 86400)]
-  ORDER BY number DESC LIMIT 10;" 2>/dev/null || echo "CI database query failed")
+  | sort_by(-.number) | .[0:10]
  | .[] | "\(.number)\t\(.status)\t\(.branch)\t\((.finished - .started) / 60 | floor)"' 2>/dev/null || echo "CI query failed")
 echo "$_recent_ci"
-_stuck=$(wpdb -c "
+# Stuck: running pipelines older than 20min (1200s)
-  SELECT count(*) FROM pipelines
+_stuck=$(echo "$_pipelines" | jq --argjson now "$_now" '
-  WHERE repo_id=${WOODPECKER_REPO_ID}
+  [.[] | select(.status == "running") | select(($now - .started) > 1200)] | length' 2>/dev/null || echo "?")
    AND status='running'
    AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200;" 2>/dev/null | xargs || echo "?")
-_pending=$(wpdb -c "
+# Pending: pending pipelines older than 30min (1800s)
-  SELECT count(*) FROM pipelines
+_pending=$(echo "$_pipelines" | jq --argjson now "$_now" '
-  WHERE repo_id=${WOODPECKER_REPO_ID}
+  [.[] | select(.status == "pending") | select(($now - .created) > 1800)] | length' 2>/dev/null || echo "?")
    AND status='pending'
    AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800;" 2>/dev/null | xargs || echo "?")
 echo "Stuck (>20min): ${_stuck}"
 echo "Pending (>30min): ${_pending}"
@ -217,7 +214,9 @@ echo ""
 echo "## Pending Vault Items"
 _found_vault=false
-for _vf in "${OPS_REPO_ROOT}/vault/pending/"*.md; do
+# Use OPS_VAULT_ROOT if set (from supervisor-run.sh degraded mode detection), otherwise default to OPS_REPO_ROOT
 _va_root="${OPS_VAULT_ROOT:-${OPS_REPO_ROOT}/vault/pending}"
 for _vf in "${_va_root}"/*.md; do
  [ -f "$_vf" ] || continue
  _found_vault=true
  _vtitle=$(grep -m1 '^# ' "$_vf" | sed 's/^# //' || basename "$_vf")
--- a/supervisor/supervisor-poll.sh
+++ b/supervisor/supervisor-poll.sh
@ -1,808 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # supervisor-poll.sh — Supervisor agent: bash checks + claude -p for fixes
 #
 # Two-layer architecture:
 #   1. Factory infrastructure (project-agnostic): RAM, disk, swap, docker, stale processes
 #   2. Per-project checks (config-driven): CI, PRs, dev-agent, deps — iterated over projects/*.toml
 #
 # Runs every 10min via cron.
 #
 # Cron: */10 * * * * /path/to/disinto/supervisor/supervisor-poll.sh
 #
 # Peek:  cat /tmp/supervisor-status
 # Log:   tail -f /path/to/disinto/supervisor/supervisor.log
 source "$(dirname "$0")/../lib/env.sh"
 source "$(dirname "$0")/../lib/ci-helpers.sh"
 LOGFILE="${DISINTO_LOG_DIR}/supervisor/supervisor.log"
 STATUSFILE="/tmp/supervisor-status"
 LOCKFILE="/tmp/supervisor-poll.lock"
 PROMPT_FILE="${FACTORY_ROOT}/formulas/run-supervisor.toml"
 PROJECTS_DIR="${FACTORY_ROOT}/projects"
 METRICS_FILE="${DISINTO_LOG_DIR}/metrics/supervisor-metrics.jsonl"
 emit_metric() {
  printf '%s\n' "$1" >> "$METRICS_FILE"
 }
 # Count all matching items from a paginated forge API endpoint.
 # Usage: codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues"
 # Returns total count across all pages (max 20 pages = 1000 items).
 codeberg_count_paginated() {
  local endpoint="$1" total=0 page=1 count
  while true; do
    count=$(forge_api GET "${endpoint}&limit=50&page=${page}" 2>/dev/null | jq 'length' 2>/dev/null || echo 0)
    total=$((total + ${count:-0}))
    [ "${count:-0}" -lt 50 ] && break
    page=$((page + 1))
    [ "$page" -gt 20 ] && break
  done
  echo "$total"
 }
 rotate_metrics() {
  [ -f "$METRICS_FILE" ] || return 0
  local cutoff tmpfile
  cutoff=$(date -u -d '30 days ago' +%Y-%m-%dT%H:%M)
  tmpfile="${METRICS_FILE}.tmp"
  jq -c --arg cutoff "$cutoff" 'select(.ts >= $cutoff)' \
    "$METRICS_FILE" > "$tmpfile" 2>/dev/null
  # Only replace if jq produced output, or the source is already empty
  if [ -s "$tmpfile" ] || [ ! -s "$METRICS_FILE" ]; then
    mv "$tmpfile" "$METRICS_FILE"
  else
    rm -f "$tmpfile"
  fi
 }
 # Prevent overlapping runs
 if [ -f "$LOCKFILE" ]; then
  LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null)
  if kill -0 "$LOCK_PID" 2>/dev/null; then
    exit 0
  fi
  rm -f "$LOCKFILE"
 fi
 echo $$ > "$LOCKFILE"
 trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT
 mkdir -p "$(dirname "$METRICS_FILE")"
 rotate_metrics
 flog() {
  printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
 }
 status() {
  printf '[%s] supervisor: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE"
  flog "$*"
 }
 # Alerts by priority
 P0_ALERTS=""
 P1_ALERTS=""
 P2_ALERTS=""
 P3_ALERTS=""
 P4_ALERTS=""
 p0() { P0_ALERTS="${P0_ALERTS}• [P0] $*\n"; flog "P0: $*"; }
 p1() { P1_ALERTS="${P1_ALERTS}• [P1] $*\n"; flog "P1: $*"; }
 p2() { P2_ALERTS="${P2_ALERTS}• [P2] $*\n"; flog "P2: $*"; }
 p3() { P3_ALERTS="${P3_ALERTS}• [P3] $*\n"; flog "P3: $*"; }
 p4() { P4_ALERTS="${P4_ALERTS}• [P4] $*\n"; flog "P4: $*"; }
 FIXES=""
 fixed() { FIXES="${FIXES}• ✅ $*\n"; flog "FIXED: $*"; }
 # #############################################################################
 #                     LAYER 1: FACTORY INFRASTRUCTURE
 #                      (project-agnostic, runs once)
 # #############################################################################
 # =============================================================================
 # P0: MEMORY — check first, fix first
 # =============================================================================
 status "P0: checking memory"
 AVAIL_MB=$(free -m | awk '/Mem:/{print $7}')
 SWAP_USED_MB=$(free -m | awk '/Swap:/{print $3}')
 if [ "${AVAIL_MB:-9999}" -lt 500 ] || { [ "${SWAP_USED_MB:-0}" -gt 3000 ] && [ "${AVAIL_MB:-9999}" -lt 2000 ]; }; then
  flog "MEMORY CRISIS: avail=${AVAIL_MB}MB swap_used=${SWAP_USED_MB}MB — auto-fixing"
  # Kill stale agent-spawned claude processes (>3h old) — skip interactive sessions
  STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true)
  if [ -n "$STALE_CLAUDES" ]; then
    echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true
    fixed "Killed stale claude processes: ${STALE_CLAUDES}"
  fi
  # Drop filesystem caches
  sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1
  fixed "Dropped filesystem caches"
  # Re-check after fixes
  AVAIL_MB_AFTER=$(free -m | awk '/Mem:/{print $7}')
  SWAP_AFTER=$(free -m | awk '/Swap:/{print $3}')
  if [ "${AVAIL_MB_AFTER:-0}" -lt 500 ] || [ "${SWAP_AFTER:-0}" -gt 3000 ]; then
    p0 "Memory still critical after auto-fix: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB"
  else
    flog "Memory recovered: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB"
  fi
 fi
 # P0 alerts already logged — clear so they are not duplicated in the final consolidated log
 if [ -n "$P0_ALERTS" ]; then
  P0_ALERTS=""
 fi
 # =============================================================================
 # P1: DISK
 # =============================================================================
 status "P1: checking disk"
 DISK_PERCENT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%')
 if [ "${DISK_PERCENT:-0}" -gt 80 ]; then
  flog "DISK PRESSURE: ${DISK_PERCENT}% — auto-cleaning"
  # Docker cleanup (safe — keeps images)
  sudo docker system prune -f >/dev/null 2>&1 && fixed "Docker prune"
  # Truncate logs >10MB
  for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do
    if [ -f "$logfile" ]; then
      SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1)
      if [ "${SIZE_KB:-0}" -gt 10240 ]; then
        truncate -s 0 "$logfile"
        fixed "Truncated $(basename "$logfile") (was ${SIZE_KB}KB)"
      fi
    fi
  done
  # Woodpecker log_entries cleanup
  LOG_ENTRIES_MB=$(wpdb -c "SELECT pg_size_pretty(pg_total_relation_size('log_entries'));" 2>/dev/null | xargs)
  if echo "$LOG_ENTRIES_MB" | grep -qP '\d+\s*(GB|MB)'; then
    SIZE_NUM=$(echo "$LOG_ENTRIES_MB" | grep -oP '\d+')
    SIZE_UNIT=$(echo "$LOG_ENTRIES_MB" | grep -oP '(GB|MB)')
    if [ "$SIZE_UNIT" = "GB" ] || { [ "$SIZE_UNIT" = "MB" ] && [ "$SIZE_NUM" -gt 500 ]; }; then
      wpdb -c "DELETE FROM log_entries WHERE id < (SELECT max(id) - 100000 FROM log_entries);" 2>/dev/null
      fixed "Trimmed Woodpecker log_entries (was ${LOG_ENTRIES_MB})"
    fi
  fi
  DISK_AFTER=$(df -h / | awk 'NR==2{print $5}' | tr -d '%')
  if [ "${DISK_AFTER:-0}" -gt 80 ]; then
    p1 "Disk still ${DISK_AFTER}% after auto-clean"
  else
    flog "Disk recovered: ${DISK_AFTER}%"
  fi
 fi
 # P1 alerts already logged — clear so they are not duplicated in the final consolidated log
 if [ -n "$P1_ALERTS" ]; then
  P1_ALERTS=""
 fi
 # Emit infra metric
 _RAM_TOTAL_MB=$(free -m | awk '/Mem:/{print $2}')
 _RAM_USED_PCT=$(( ${_RAM_TOTAL_MB:-0} > 0 ? (${_RAM_TOTAL_MB:-0} - ${AVAIL_MB:-0}) * 100 / ${_RAM_TOTAL_MB:-1} : 0 ))
 emit_metric "$(jq -nc \
  --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \
  --argjson ram "${_RAM_USED_PCT:-0}" \
  --argjson disk "${DISK_PERCENT:-0}" \
  --argjson swap "${SWAP_USED_MB:-0}" \
  '{ts:$ts,type:"infra",ram_used_pct:$ram,disk_used_pct:$disk,swap_mb:$swap}' 2>/dev/null)" 2>/dev/null || true
 # =============================================================================
 # P4-INFRA: HOUSEKEEPING — stale processes, log rotation (project-agnostic)
 # =============================================================================
 status "P4: infra housekeeping"
 # Stale agent-spawned claude processes (>3h) — skip interactive sessions
 STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true)
 if [ -n "$STALE_CLAUDES" ]; then
  echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true
  fixed "Killed stale claude processes: $(echo $STALE_CLAUDES | wc -w) procs"
 fi
 # Rotate logs >5MB
 for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do
  if [ -f "$logfile" ]; then
    SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1)
    if [ "${SIZE_KB:-0}" -gt 5120 ]; then
      mv "$logfile" "${logfile}.old" 2>/dev/null
      fixed "Rotated $(basename "$logfile")"
    fi
  fi
 done
 # #############################################################################
 #                      LAYER 2: PER-PROJECT CHECKS
 #               (iterated over projects/*.toml, config-driven)
 # #############################################################################
 # Infra retry tracking (shared across projects, created once)
 _RETRY_DIR="/tmp/supervisor-infra-retries"
 mkdir -p "$_RETRY_DIR"
 # Function: run all per-project checks for the currently loaded project config
 check_project() {
  local proj_name="${PROJECT_NAME:-unknown}"
  flog "── checking project: ${proj_name} (${FORGE_REPO}) ──"
  # ===========================================================================
  # P2: FACTORY STOPPED — CI, dev-agent, git
  # ===========================================================================
  status "P2: ${proj_name}: checking pipeline"
  # CI stuck
  STUCK_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='running' AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200;" 2>/dev/null | xargs || true)
  [ "${STUCK_CI:-0}" -gt 0 ] 2>/dev/null && p2 "${proj_name}: CI: ${STUCK_CI} pipeline(s) running >20min"
  PENDING_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='pending' AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800;" 2>/dev/null | xargs || true)
  [ "${PENDING_CI:-0}" -gt 0 ] && p2 "${proj_name}: CI: ${PENDING_CI} pipeline(s) pending >30min"
  # Emit CI metric (last completed pipeline within 24h — skip if project has no recent CI)
  _CI_ROW=$(wpdb -A -F ',' -c "SELECT id, COALESCE(ROUND(EXTRACT(EPOCH FROM (to_timestamp(finished) - to_timestamp(started)))/60)::int, 0), status FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status IN ('success','failure','error') AND finished > 0 AND to_timestamp(finished) > now() - interval '24 hours' ORDER BY id DESC LIMIT 1;" 2>/dev/null | grep -E '^[0-9]' | head -1 || true)
  if [ -n "$_CI_ROW" ]; then
    _CI_ID=$(echo "$_CI_ROW" | cut -d',' -f1 | tr -d ' ')
    _CI_DUR=$(echo "$_CI_ROW" | cut -d',' -f2 | tr -d ' ')
    _CI_STAT=$(echo "$_CI_ROW" | cut -d',' -f3 | tr -d ' ')
    emit_metric "$(jq -nc \
      --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \
      --arg proj "$proj_name" \
      --argjson pipeline "${_CI_ID:-0}" \
      --argjson duration "${_CI_DUR:-0}" \
      --arg status "${_CI_STAT:-unknown}" \
      '{ts:$ts,type:"ci",project:$proj,pipeline:$pipeline,duration_min:$duration,status:$status}' 2>/dev/null)" 2>/dev/null || true
  fi
  # ===========================================================================
  # P2e: INFRA FAILURES — auto-retrigger pipelines with infra failures
  # ===========================================================================
  if [ "${CHECK_INFRA_RETRY:-true}" = "true" ]; then
    status "P2e: ${proj_name}: checking infra failures"
    # Recent failed pipelines (last 6h)
    _failed_nums=$(wpdb -A -c "
      SELECT number FROM pipelines
      WHERE repo_id = ${WOODPECKER_REPO_ID}
        AND status IN ('failure', 'error')
        AND finished > 0
        AND to_timestamp(finished) > now() - interval '6 hours'
      ORDER BY number DESC LIMIT 5;" 2>/dev/null \
      | tr -d ' ' | grep -E '^[0-9]+$' || true)
    # shellcheck disable=SC2086
    for _pip_num in $_failed_nums; do
      [ -z "$_pip_num" ] && continue
      # Check retry count; alert if retries exhausted
      _retry_file="${_RETRY_DIR}/${WOODPECKER_REPO_ID}-${_pip_num}"
      _retries=0
      [ -f "$_retry_file" ] && _retries=$(cat "$_retry_file" 2>/dev/null || echo 0)
      if [ "${_retries:-0}" -ge 2 ]; then
        p2 "${proj_name}: Pipeline #${_pip_num}: infra retries exhausted (2/2), needs manual investigation"
        continue
      fi
      # Classify failure type via shared helper
      _classification=$(classify_pipeline_failure "${WOODPECKER_REPO_ID}" "$_pip_num" 2>/dev/null || echo "code")
      if [[ "$_classification" == infra* ]]; then
        _infra_reason="${_classification#infra }"
        _new_retries=$(( _retries + 1 ))
        if woodpecker_api "/repos/${WOODPECKER_REPO_ID}/pipelines/${_pip_num}" \
             -X POST >/dev/null 2>&1; then
          echo "$_new_retries" > "$_retry_file"
          fixed "${proj_name}: Retriggered pipeline #${_pip_num} (${_infra_reason}, retry ${_new_retries}/2)"
        else
          p2 "${proj_name}: Pipeline #${_pip_num}: infra failure (${_infra_reason}) but retrigger API call failed"
          flog "${proj_name}: Failed to retrigger pipeline #${_pip_num}: API error"
        fi
      fi
    done
    # Clean up stale retry tracking files (>24h)
    find "$_RETRY_DIR" -type f -mmin +1440 -delete 2>/dev/null || true
  fi
  # Dev-agent health (only if monitoring enabled)
  if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then
    DEV_LOCK="/tmp/dev-agent-${proj_name}.lock"
    if [ -f "$DEV_LOCK" ]; then
      DEV_PID=$(cat "$DEV_LOCK" 2>/dev/null)
      if ! kill -0 "$DEV_PID" 2>/dev/null; then
        rm -f "$DEV_LOCK"
        fixed "${proj_name}: Removed stale dev-agent lock (PID ${DEV_PID} dead)"
      else
        DEV_STATUS_AGE=$(stat -c %Y "/tmp/dev-agent-status-${proj_name}" 2>/dev/null || echo 0)
        NOW_EPOCH=$(date +%s)
        STATUS_AGE_MIN=$(( (NOW_EPOCH - DEV_STATUS_AGE) / 60 ))
        if [ "$STATUS_AGE_MIN" -gt 30 ]; then
          p2 "${proj_name}: Dev-agent: status unchanged for ${STATUS_AGE_MIN}min"
        fi
      fi
    fi
  fi
  # Git repo health
  if [ -d "${PROJECT_REPO_ROOT}" ]; then
    cd "${PROJECT_REPO_ROOT}" 2>/dev/null || true
    GIT_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
    GIT_REBASE=$([ -d .git/rebase-merge ] || [ -d .git/rebase-apply ] && echo "yes" || echo "no")
    if [ "$GIT_REBASE" = "yes" ]; then
      git rebase --abort 2>/dev/null && git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \
        fixed "${proj_name}: Aborted stale rebase, switched to ${PRIMARY_BRANCH}" || \
        p2 "${proj_name}: Git: stale rebase, auto-abort failed"
    fi
    if [ "$GIT_BRANCH" != "${PRIMARY_BRANCH}" ] && [ "$GIT_BRANCH" != "unknown" ]; then
      git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \
        fixed "${proj_name}: Switched repo from '${GIT_BRANCH}' to ${PRIMARY_BRANCH}" || \
        p2 "${proj_name}: Git: on '${GIT_BRANCH}' instead of ${PRIMARY_BRANCH}"
    fi
  fi
  # ===========================================================================
  # P2b: FACTORY STALLED — backlog exists but no agent running
  # ===========================================================================
  if [ "${CHECK_PIPELINE_STALL:-true}" = "true" ]; then
    status "P2: ${proj_name}: checking pipeline stall"
    BACKLOG_COUNT=$(forge_api GET "/issues?state=open&labels=backlog&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0")
    IN_PROGRESS=$(forge_api GET "/issues?state=open&labels=in-progress&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0")
    if [ "${BACKLOG_COUNT:-0}" -gt 0 ] && [ "${IN_PROGRESS:-0}" -eq 0 ]; then
      DEV_LOG="${DISINTO_LOG_DIR}/dev/dev-agent.log"
      if [ -f "$DEV_LOG" ]; then
        LAST_LOG_EPOCH=$(stat -c %Y "$DEV_LOG" 2>/dev/null || echo 0)
      else
        LAST_LOG_EPOCH=0
      fi
      NOW_EPOCH=$(date +%s)
      IDLE_MIN=$(( (NOW_EPOCH - LAST_LOG_EPOCH) / 60 ))
      if [ "$IDLE_MIN" -gt 20 ]; then
        p2 "${proj_name}: Pipeline stalled: ${BACKLOG_COUNT} backlog issue(s), no agent ran for ${IDLE_MIN}min"
      fi
    fi
  fi
  # ===========================================================================
  # P2c: DEV-AGENT PRODUCTIVITY — all backlog blocked for too long
  # ===========================================================================
  if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then
    status "P2: ${proj_name}: checking dev-agent productivity"
    DEV_LOG_FILE="${DISINTO_LOG_DIR}/dev/dev-agent.log"
    if [ -f "$DEV_LOG_FILE" ]; then
      RECENT_POLLS=$(tail -100 "$DEV_LOG_FILE" | grep "poll:" | tail -6)
      TOTAL_RECENT=$(echo "$RECENT_POLLS" | grep -c "." || true)
      BLOCKED_IN_RECENT=$(echo "$RECENT_POLLS" | grep -c "no ready issues" || true)
      if [ "$TOTAL_RECENT" -ge 6 ] && [ "$BLOCKED_IN_RECENT" -eq "$TOTAL_RECENT" ]; then
        p2 "${proj_name}: Dev-agent blocked: last ${BLOCKED_IN_RECENT} polls all report 'no ready issues'"
      fi
    fi
  fi
  # ===========================================================================
  # P3: FACTORY DEGRADED — derailed PRs, unreviewed PRs
  # ===========================================================================
  if [ "${CHECK_PRS:-true}" = "true" ]; then
    status "P3: ${proj_name}: checking PRs"
    OPEN_PRS=$(forge_api GET "/pulls?state=open&limit=10" 2>/dev/null | jq -r '.[].number' 2>/dev/null || true)
    for pr in $OPEN_PRS; do
      PR_JSON=$(forge_api GET "/pulls/${pr}" 2>/dev/null || true)
      [ -z "$PR_JSON" ] && continue
      PR_SHA=$(echo "$PR_JSON" | jq -r '.head.sha // ""')
      [ -z "$PR_SHA" ] && continue
      CI_STATE=$(ci_commit_status "$PR_SHA" 2>/dev/null || true)
      MERGEABLE=$(echo "$PR_JSON" | jq -r '.mergeable // true')
      if [ "$MERGEABLE" = "false" ] && ci_passed "$CI_STATE"; then
        p3 "${proj_name}: PR #${pr}: CI pass but merge conflict — needs rebase"
      elif [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then
        UPDATED=$(echo "$PR_JSON" | jq -r '.updated_at // ""')
        if [ -n "$UPDATED" ]; then
          UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || echo 0)
          NOW_EPOCH=$(date +%s)
          AGE_MIN=$(( (NOW_EPOCH - UPDATED_EPOCH) / 60 ))
          [ "$AGE_MIN" -gt 30 ] && p3 "${proj_name}: PR #${pr}: CI=${CI_STATE}, stale ${AGE_MIN}min"
        fi
      elif ci_passed "$CI_STATE"; then
        HAS_REVIEW=$(forge_api GET "/issues/${pr}/comments?limit=50" 2>/dev/null | \
          jq -r --arg sha "$PR_SHA" '[.[] | select(.body | contains("<!-- reviewed: " + $sha))] | length' 2>/dev/null || echo "0")
        if [ "${HAS_REVIEW:-0}" -eq 0 ]; then
          UPDATED=$(echo "$PR_JSON" | jq -r '.updated_at // ""')
          if [ -n "$UPDATED" ]; then
            UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || echo 0)
            NOW_EPOCH=$(date +%s)
            AGE_MIN=$(( (NOW_EPOCH - UPDATED_EPOCH) / 60 ))
            if [ "$AGE_MIN" -gt 60 ]; then
              p3 "${proj_name}: PR #${pr}: CI passed, no review for ${AGE_MIN}min"
              bash "${FACTORY_ROOT}/review/review-pr.sh" "$pr" >> "${DISINTO_LOG_DIR}/review/review.log" 2>&1 &
              fixed "${proj_name}: Auto-triggered review for PR #${pr}"
            fi
          fi
        fi
      fi
    done
  fi
  # ===========================================================================
  # P3b: CIRCULAR DEPENDENCIES — deadlock detection
  # ===========================================================================
  status "P3: ${proj_name}: checking for circular dependencies"
  BACKLOG_FOR_DEPS=$(forge_api GET "/issues?state=open&labels=backlog&type=issues&limit=50" 2>/dev/null || true)
  if [ -n "$BACKLOG_FOR_DEPS" ] && [ "$BACKLOG_FOR_DEPS" != "null" ] && [ "$(echo "$BACKLOG_FOR_DEPS" | jq 'length' 2>/dev/null || echo 0)" -gt 0 ]; then
    PARSE_DEPS="${FACTORY_ROOT}/lib/parse-deps.sh"
    ISSUE_COUNT=$(echo "$BACKLOG_FOR_DEPS" | jq 'length')
    declare -A DEPS_OF
    declare -A BACKLOG_NUMS
    for i in $(seq 0 $((ISSUE_COUNT - 1))); do
      NUM=$(echo "$BACKLOG_FOR_DEPS" | jq -r ".[$i].number")
      BODY=$(echo "$BACKLOG_FOR_DEPS" | jq -r ".[$i].body // \"\"")
      ISSUE_DEPS=$(echo "$BODY" | bash "$PARSE_DEPS" | grep -v "^${NUM}$" || true)
      [ -n "$ISSUE_DEPS" ] && DEPS_OF[$NUM]="$ISSUE_DEPS"
      BACKLOG_NUMS[$NUM]=1
    done
    declare -A NODE_COLOR
    for node in "${!BACKLOG_NUMS[@]}"; do NODE_COLOR[$node]=0; done
    FOUND_CYCLES=""
    declare -A SEEN_CYCLES
    dfs_detect_cycle() {
      local node="$1" path="$2"
      NODE_COLOR[$node]=1
      for dep in ${DEPS_OF[$node]:-}; do
        [ -z "${BACKLOG_NUMS[$dep]+x}" ] && continue
        if [ "${NODE_COLOR[$dep]}" = "1" ]; then
          local cycle_key
              cycle_key=$(echo "$path $dep" | tr ' ' '\n' | sort -n | tr '\n' ' ')
          if [ -z "${SEEN_CYCLES[$cycle_key]+x}" ]; then
            SEEN_CYCLES[$cycle_key]=1
            local in_cycle=0 cycle_str=""
            for p in $path $dep; do
              [ "$p" = "$dep" ] && in_cycle=1
              [ "$in_cycle" = "1" ] && cycle_str="${cycle_str:+$cycle_str -> }#${p}"
            done
            FOUND_CYCLES="${FOUND_CYCLES}${cycle_str}\n"
          fi
        elif [ "${NODE_COLOR[$dep]}" = "0" ]; then
          dfs_detect_cycle "$dep" "$path $dep"
        fi
      done
      NODE_COLOR[$node]=2
    }
    for node in "${!DEPS_OF[@]}"; do
      [ "${NODE_COLOR[$node]:-2}" = "0" ] && dfs_detect_cycle "$node" "$node"
    done
    if [ -n "$FOUND_CYCLES" ]; then
      echo -e "$FOUND_CYCLES" | while IFS= read -r cycle; do
        [ -z "$cycle" ] && continue
        p3 "${proj_name}: Circular dependency deadlock: ${cycle}"
      done
    fi
    # =========================================================================
    # P3c: STALE DEPENDENCIES — blocked by old open issues (>30 days)
    # =========================================================================
    status "P3: ${proj_name}: checking for stale dependencies"
    NOW_EPOCH=$(date +%s)
    declare -A DEP_CACHE
    for issue_num in "${!DEPS_OF[@]}"; do
      for dep in ${DEPS_OF[$issue_num]}; do
        if [ -n "${DEP_CACHE[$dep]+x}" ]; then
          DEP_INFO="${DEP_CACHE[$dep]}"
        else
          DEP_JSON=$(forge_api GET "/issues/${dep}" 2>/dev/null || true)
          [ -z "$DEP_JSON" ] && continue
          DEP_STATE=$(echo "$DEP_JSON" | jq -r '.state // "unknown"')
          DEP_CREATED=$(echo "$DEP_JSON" | jq -r '.created_at // ""')
          DEP_TITLE=$(echo "$DEP_JSON" | jq -r '.title // ""' | head -c 50)
          DEP_INFO="${DEP_STATE}|${DEP_CREATED}|${DEP_TITLE}"
          DEP_CACHE[$dep]="$DEP_INFO"
        fi
        DEP_STATE="${DEP_INFO%%|*}"
        [ "$DEP_STATE" != "open" ] && continue
        DEP_REST="${DEP_INFO#*|}"
        DEP_CREATED="${DEP_REST%%|*}"
        DEP_TITLE="${DEP_REST#*|}"
        [ -z "$DEP_CREATED" ] && continue
        CREATED_EPOCH=$(date -d "$DEP_CREATED" +%s 2>/dev/null || echo 0)
        AGE_DAYS=$(( (NOW_EPOCH - CREATED_EPOCH) / 86400 ))
        if [ "$AGE_DAYS" -gt 30 ]; then
          p3 "${proj_name}: Stale dependency: #${issue_num} blocked by #${dep} \"${DEP_TITLE}\" (open ${AGE_DAYS} days)"
        fi
      done
    done
    unset DEPS_OF BACKLOG_NUMS NODE_COLOR SEEN_CYCLES DEP_CACHE
  fi
  # Emit dev metric (paginated to avoid silent cap at 50)
  _BACKLOG_COUNT=$(codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues")
  _BLOCKED_COUNT=$(codeberg_count_paginated "/issues?state=open&labels=blocked&type=issues")
  _PR_COUNT=$(codeberg_count_paginated "/pulls?state=open")
  emit_metric "$(jq -nc \
    --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \
    --arg proj "$proj_name" \
    --argjson backlog "${_BACKLOG_COUNT:-0}" \
    --argjson blocked "${_BLOCKED_COUNT:-0}" \
    --argjson prs "${_PR_COUNT:-0}" \
    '{ts:$ts,type:"dev",project:$proj,issues_in_backlog:$backlog,issues_blocked:$blocked,pr_open:$prs}' 2>/dev/null)" 2>/dev/null || true
  # ===========================================================================
  # P2d: ESCALATE — inject human replies into escalated dev sessions
  # ===========================================================================
  status "P2: ${proj_name}: checking escalate sessions"
  HUMAN_REPLY_FILE="/tmp/dev-escalation-reply"
  for _nh_phase_file in /tmp/dev-session-"${proj_name}"-*.phase; do
    [ -f "$_nh_phase_file" ] || continue
    _nh_phase=$(head -1 "$_nh_phase_file" 2>/dev/null | tr -d '[:space:]' || true)
    [ "$_nh_phase" = "PHASE:escalate" ] || continue
    _nh_issue=$(basename "$_nh_phase_file" .phase)
    _nh_issue="${_nh_issue#dev-session-${proj_name}-}"
    [ -z "$_nh_issue" ] && continue
    _nh_session="dev-${proj_name}-${_nh_issue}"
    # Check tmux session is alive
    if ! tmux has-session -t "$_nh_session" 2>/dev/null; then
      flog "${proj_name}: #${_nh_issue} phase=escalate but tmux session gone"
      continue
    fi
    # Inject human reply if available (atomic mv to prevent double-injection with gardener)
    _nh_claimed="/tmp/dev-escalation-reply.supervisor.$$"
    if [ -s "$HUMAN_REPLY_FILE" ] && mv "$HUMAN_REPLY_FILE" "$_nh_claimed" 2>/dev/null; then
      _nh_reply=$(cat "$_nh_claimed")
      _nh_inject_msg="Human reply received for issue #${_nh_issue}:
 ${_nh_reply}
 Instructions:
 1. Read the human's guidance carefully.
 2. Continue your work based on their input.
 3. When done, push your changes and write the appropriate phase."
      _nh_tmpfile=$(mktemp /tmp/human-inject-XXXXXX)
      printf '%s' "$_nh_inject_msg" > "$_nh_tmpfile"
      # All tmux calls guarded: session may die between has-session and here
      tmux load-buffer -b "human-inject-${_nh_issue}" "$_nh_tmpfile" || true
      tmux paste-buffer -t "$_nh_session" -b "human-inject-${_nh_issue}" || true
      sleep 0.5
      tmux send-keys -t "$_nh_session" "" Enter || true
      tmux delete-buffer -b "human-inject-${_nh_issue}" 2>/dev/null || true
      rm -f "$_nh_tmpfile" "$_nh_claimed"
      rm -f "/tmp/dev-renotify-${proj_name}-${_nh_issue}"
      flog "${proj_name}: #${_nh_issue} human reply injected into session ${_nh_session}"
      fixed "${proj_name}: Injected human reply into dev session #${_nh_issue}"
      break  # one reply to deliver
    else
      # No reply yet — check for timeout (re-notify at 6h, alert at 24h)
      _nh_mtime=$(stat -c %Y "$_nh_phase_file" 2>/dev/null || echo 0)
      _nh_now=$(date +%s)
      _nh_age=$(( _nh_now - _nh_mtime ))
      if [ "$_nh_age" -gt 86400 ]; then
        p2 "${proj_name}: Dev session #${_nh_issue} stuck in escalate for >24h"
      elif [ "$_nh_age" -gt 21600 ]; then
        _nh_renotify="/tmp/dev-renotify-${proj_name}-${_nh_issue}"
        if [ ! -f "$_nh_renotify" ]; then
          _nh_age_h=$(( _nh_age / 3600 ))
          touch "$_nh_renotify"
          flog "${proj_name}: #${_nh_issue} re-notified (escalate for ${_nh_age_h}h)"
        fi
      fi
    fi
  done
  # ===========================================================================
  # P4-PROJECT: Orphaned tmux sessions — PR/issue closed externally
  # ===========================================================================
  status "P4: ${proj_name}: sweeping orphaned dev sessions"
  while IFS= read -r _sess; do
    [ -z "$_sess" ] && continue
    # Extract issue number from dev-{project}-{issue}
    _sess_issue="${_sess#dev-"${proj_name}"-}"
    [[ "$_sess_issue" =~ ^[0-9]+$ ]] || continue
    # Check forge: is the issue still open?
    _issue_state=$(forge_api GET "/issues/${_sess_issue}" 2>/dev/null \
      | jq -r '.state // "open"' 2>/dev/null || echo "open")
    _should_cleanup=false
    _cleanup_reason=""
    if [ "$_issue_state" = "closed" ]; then
      _should_cleanup=true
      _cleanup_reason="issue #${_sess_issue} closed externally"
    else
      # Issue still open — skip cleanup during active-wait phases (no PR yet is normal)
      _phase_file="/tmp/dev-session-${proj_name}-${_sess_issue}.phase"
      _curr_phase=$(head -1 "$_phase_file" 2>/dev/null | tr -d '[:space:]' || true)
      case "${_curr_phase:-}" in
        PHASE:escalate|PHASE:awaiting_ci|PHASE:awaiting_review)
          continue  # session has legitimate pending work
          ;;
      esac
      # Check if associated PR is open (paginated)
      _pr_branch="fix/issue-${_sess_issue}"
      _has_open_pr=0
      _pr_page=1
      while true; do
        _pr_page_json=$(forge_api GET "/pulls?state=open&limit=50&page=${_pr_page}" \
          2>/dev/null || echo "[]")
        _pr_page_len=$(printf '%s' "$_pr_page_json" | jq 'length' 2>/dev/null || echo 0)
        _pr_match=$(printf '%s' "$_pr_page_json" | \
          jq --arg b "$_pr_branch" '[.[] | select(.head.ref == $b)] | length' \
          2>/dev/null || echo 0)
        _has_open_pr=$(( _has_open_pr + ${_pr_match:-0} ))
        [ "${_has_open_pr:-0}" -gt 0 ] && break
        [ "${_pr_page_len:-0}" -lt 50 ] && break
        _pr_page=$(( _pr_page + 1 ))
        [ "$_pr_page" -gt 20 ] && break
      done
      if [ "$_has_open_pr" -eq 0 ]; then
        # No open PR — check for a closed/merged PR with this branch (paginated)
        _has_closed_pr=0
        _pr_page=1
        while true; do
          _pr_page_json=$(forge_api GET "/pulls?state=closed&limit=50&page=${_pr_page}" \
            2>/dev/null || echo "[]")
          _pr_page_len=$(printf '%s' "$_pr_page_json" | jq 'length' 2>/dev/null || echo 0)
          _pr_match=$(printf '%s' "$_pr_page_json" | \
            jq --arg b "$_pr_branch" '[.[] | select(.head.ref == $b)] | length' \
            2>/dev/null || echo 0)
          _has_closed_pr=$(( _has_closed_pr + ${_pr_match:-0} ))
          [ "${_has_closed_pr:-0}" -gt 0 ] && break
          [ "${_pr_page_len:-0}" -lt 50 ] && break
          _pr_page=$(( _pr_page + 1 ))
          [ "$_pr_page" -gt 20 ] && break
        done
        if [ "$_has_closed_pr" -gt 0 ]; then
          _should_cleanup=true
          _cleanup_reason="PR for issue #${_sess_issue} is closed/merged"
        else
          # No PR at all — clean up if session idle >30min
          # On query failure, skip rather than defaulting to epoch 0
          if ! _sess_activity=$(tmux display-message -t "$_sess" \
              -p '#{session_activity}' 2>/dev/null); then
            flog "${proj_name}: Could not query activity for session ${_sess}, skipping"
            continue
          fi
          _now_ts=$(date +%s)
          _idle_min=$(( (_now_ts - _sess_activity) / 60 ))
          if [ "$_idle_min" -gt 30 ]; then
            _should_cleanup=true
            _cleanup_reason="no PR found, session idle ${_idle_min}min"
          fi
        fi
      fi
    fi
    if [ "$_should_cleanup" = true ]; then
      tmux kill-session -t "$_sess" 2>/dev/null || true
      _wt="/tmp/${proj_name}-worktree-${_sess_issue}"
      if [ -d "$_wt" ]; then
        git -C "$PROJECT_REPO_ROOT" worktree remove --force "$_wt" 2>/dev/null || true
      fi
      # Remove lock only if its recorded PID is no longer alive
      _lock="/tmp/dev-agent-${proj_name}.lock"
      if [ -f "$_lock" ]; then
        _lock_pid=$(cat "$_lock" 2>/dev/null || true)
        if [ -n "${_lock_pid:-}" ] && ! kill -0 "$_lock_pid" 2>/dev/null; then
          rm -f "$_lock"
        fi
      fi
      rm -f "/tmp/dev-session-${proj_name}-${_sess_issue}.phase"
      fixed "${proj_name}: Cleaned orphaned session ${_sess} (${_cleanup_reason})"
    fi
  done < <(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^dev-${proj_name}-" || true)
  # ===========================================================================
  # P4-PROJECT: Clean stale worktrees for this project
  # ===========================================================================
  NOW_TS=$(date +%s)
  for wt in /tmp/${proj_name}-worktree-* /tmp/${proj_name}-review-* /tmp/${proj_name}-sup-retry-*; do
    [ -d "$wt" ] || continue
    WT_AGE_MIN=$(( (NOW_TS - $(stat -c %Y "$wt")) / 60 ))
    if [ "$WT_AGE_MIN" -gt 120 ]; then
      WT_BASE=$(basename "$wt")
      if ! pgrep -f "$WT_BASE" >/dev/null 2>&1; then
        git -C "$PROJECT_REPO_ROOT" worktree remove --force "$wt" 2>/dev/null && \
          fixed "${proj_name}: Removed stale worktree: $wt (${WT_AGE_MIN}min old)" || true
      fi
    fi
  done
  git -C "$PROJECT_REPO_ROOT" worktree prune 2>/dev/null || true
 }
 # =============================================================================
 # Iterate over all registered projects
 # =============================================================================
 status "checking projects"
 PROJECT_COUNT=0
 if [ -d "$PROJECTS_DIR" ]; then
  for project_toml in "${PROJECTS_DIR}"/*.toml; do
    [ -f "$project_toml" ] || continue
    PROJECT_COUNT=$((PROJECT_COUNT + 1))
    # Load project config (overrides FORGE_REPO, PROJECT_REPO_ROOT, etc.)
    source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml"
    check_project || flog "check_project failed for ${project_toml} (per-project checks incomplete)"
  done
 fi
 if [ "$PROJECT_COUNT" -eq 0 ]; then
  # Fallback: no project TOML files, use .env config (backwards compatible)
  flog "No projects/*.toml found, using .env defaults"
  check_project || flog "check_project failed with .env defaults (per-project checks incomplete)"
 fi
 # #############################################################################
 #                              RESULT
 # #############################################################################
 ALL_ALERTS="${P0_ALERTS}${P1_ALERTS}${P2_ALERTS}${P3_ALERTS}${P4_ALERTS}"
 if [ -n "$ALL_ALERTS" ]; then
  ALERT_TEXT=$(echo -e "$ALL_ALERTS")
  flog "Invoking claude -p for alerts"
  CLAUDE_PROMPT="$(cat "$PROMPT_FILE" 2>/dev/null || echo "You are a supervisor agent. Fix the issue below.")
 ## Current Alerts
 ${ALERT_TEXT}
 ## Auto-fixes already applied by bash
 $(echo -e "${FIXES:-None}")
 ## System State
 RAM: $(free -m | awk '/Mem:/{printf "avail=%sMB", $7}') $(free -m | awk '/Swap:/{printf "swap=%sMB", $3}')
 Disk: $(df -h / | awk 'NR==2{printf "%s used of %s (%s)", $3, $2, $5}')
 Docker: $(sudo docker ps --format '{{.Names}}' 2>/dev/null | wc -l) containers running
 Claude procs: $(pgrep -f "claude" 2>/dev/null | wc -l)
 Fix what you can. File vault items for what you can't. Read the relevant best-practices file first."
  CLAUDE_OUTPUT=$(timeout 300 claude -p --model sonnet --dangerously-skip-permissions \
    "$CLAUDE_PROMPT" 2>&1) || true
  flog "claude output: $(echo "$CLAUDE_OUTPUT" | tail -20)"
  status "claude responded"
 else
  [ -n "$FIXES" ] && flog "Housekeeping: $(echo -e "$FIXES")"
  status "all clear"
 fi
--- a/supervisor/supervisor-run.sh
+++ b/supervisor/supervisor-run.sh
@ -1,12 +1,12 @@
 #!/usr/bin/env bash
 # =============================================================================
-# supervisor-run.sh — Cron wrapper: supervisor execution via SDK + formula
+# supervisor-run.sh — Polling-loop wrapper: supervisor execution via SDK + formula
 #
 # Synchronous bash loop using claude -p (one-shot invocation).
 # No tmux sessions, no phase files — the bash script IS the state machine.
 #
 # Flow:
-#   1. Guards: cron lock, memory check
+#   1. Guards: run lock, memory check
 #   2. Housekeeping: clean up stale crashed worktrees
 #   3. Collect pre-flight metrics (supervisor/preflight.sh)
 #   4. Load formula (formulas/run-supervisor.toml)
@ -16,7 +16,7 @@
 # Usage:
 #   supervisor-run.sh [projects/disinto.toml]   # project config (default: disinto)
 #
-# Cron: */20 * * * * cd /path/to/dark-factory && bash supervisor/supervisor-run.sh
+# Called by: entrypoint.sh polling loop (every 20 minutes)
 # =============================================================================
 set -euo pipefail
@ -38,7 +38,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
 # shellcheck source=../lib/agent-sdk.sh
 source "$FACTORY_ROOT/lib/agent-sdk.sh"
-LOG_FILE="$SCRIPT_DIR/supervisor.log"
+LOG_FILE="${DISINTO_LOG_DIR}/supervisor/supervisor.log"
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh
 LOGFILE="$LOG_FILE"
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh
@ -46,31 +46,66 @@ SID_FILE="/tmp/supervisor-session-${PROJECT_NAME}.sid"
 SCRATCH_FILE="/tmp/supervisor-${PROJECT_NAME}-scratch.md"
 WORKTREE="/tmp/${PROJECT_NAME}-supervisor-run"
-log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
+# Override LOG_AGENT for consistent agent identification
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh and env.sh log()
 LOG_AGENT="supervisor"
 # ── OPS Repo Detection (Issue #544) ──────────────────────────────────────
 # Detect if OPS_REPO_ROOT is available and set degraded mode flag if not.
 # This allows the supervisor to run with fallback knowledge files and
 # local journal/vault paths when the ops repo is absent.
 if [ -z "${OPS_REPO_ROOT:-}" ] || [ ! -d "${OPS_REPO_ROOT}" ]; then
  log "WARNING: OPS_REPO_ROOT not set or directory missing — running in degraded mode (no playbooks, no journal continuity, no vault destination)"
  export OPS_REPO_DEGRADED=1
  # Set fallback paths for degraded mode
  export OPS_KNOWLEDGE_ROOT="${FACTORY_ROOT}/knowledge"
  export OPS_JOURNAL_ROOT="${FACTORY_ROOT}/state/supervisor-journal"
  export OPS_VAULT_ROOT="${PROJECT_REPO_ROOT}/vault/pending"
  mkdir -p "$OPS_JOURNAL_ROOT" "$OPS_VAULT_ROOT" 2>/dev/null || true
 else
  export OPS_REPO_DEGRADED=0
  export OPS_KNOWLEDGE_ROOT="${OPS_REPO_ROOT}/knowledge"
  export OPS_JOURNAL_ROOT="${OPS_REPO_ROOT}/journal/supervisor"
  export OPS_VAULT_ROOT="${OPS_REPO_ROOT}/vault/pending"
  mkdir -p "$OPS_JOURNAL_ROOT" "$OPS_VAULT_ROOT" 2>/dev/null || true
 fi
 # Override log() to append to supervisor-specific log file
 # shellcheck disable=SC2034
 log() {
  local agent="${LOG_AGENT:-supervisor}"
  printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
 }
 # ── Guards ────────────────────────────────────────────────────────────────
 check_active supervisor
-acquire_cron_lock "/tmp/supervisor-run.lock"
+acquire_run_lock "/tmp/supervisor-run.lock"
-check_memory 2000
+memory_guard 2000
 log "--- Supervisor run start ---"
 # ── Resolve forge remote for git operations ─────────────────────────────
 # Run git operations from the project checkout, not the baked code dir
 cd "$PROJECT_REPO_ROOT"
 # ── Housekeeping: clean up stale crashed worktrees (>24h) ────────────────
 cleanup_stale_crashed_worktrees 24
 # ── Resolve agent identity for .profile repo ────────────────────────────
-if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_SUPERVISOR_TOKEN:-}" ]; then
+resolve_agent_identity || true
  AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_SUPERVISOR_TOKEN}" \
    "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
 fi
 # ── Collect pre-flight metrics ────────────────────────────────────────────
 log "Running preflight.sh"
 PREFLIGHT_OUTPUT=""
 PREFLIGHT_RC=0
 if PREFLIGHT_OUTPUT=$(bash "$SCRIPT_DIR/preflight.sh" "$PROJECT_TOML" 2>&1); then
  log "Preflight collected ($(echo "$PREFLIGHT_OUTPUT" | wc -l) lines)"
 else
-  log "WARNING: preflight.sh failed, continuing with partial data"
+  PREFLIGHT_RC=$?
  log "WARNING: preflight.sh failed (exit code $PREFLIGHT_RC), continuing with partial data"
  if [ -n "$PREFLIGHT_OUTPUT" ]; then
    log "Preflight error: $(echo "$PREFLIGHT_OUTPUT" | tail -3)"
  fi
 fi
 # ── Load formula + context ───────────────────────────────────────────────
@ -91,6 +126,25 @@ export CLAUDE_MODEL="sonnet"
 # ── Create worktree (before prompt assembly so trap is set early) ────────
 formula_worktree_setup "$WORKTREE"
 # Inject OPS repo status into prompt
 if [ "${OPS_REPO_DEGRADED:-0}" = "1" ]; then
  OPS_STATUS="
 ## OPS Repo Status
 **DEGRADED MODE**: OPS repo is not available. Using bundled knowledge files and local journal/vault paths.
 - Knowledge files: ${OPS_KNOWLEDGE_ROOT:-<unset>}
 - Journal: ${OPS_JOURNAL_ROOT:-<unset>}
 - Vault destination: ${OPS_VAULT_ROOT:-<unset>}
 "
 else
  OPS_STATUS="
 ## OPS Repo Status
 **FULL MODE**: OPS repo available at ${OPS_REPO_ROOT}
 - Knowledge files: ${OPS_KNOWLEDGE_ROOT:-<unset>}
 - Journal: ${OPS_JOURNAL_ROOT:-<unset>}
 - Vault destination: ${OPS_VAULT_ROOT:-<unset>}
 "
 fi
 PROMPT="You are the supervisor agent for ${FORGE_REPO}. Work through the formula below.
 You have full shell access and --dangerously-skip-permissions.
@ -103,6 +157,7 @@ ${PREFLIGHT_OUTPUT}
 ${CONTEXT_BLOCK}$(formula_lessons_block)
 ${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT}
 }
 ${OPS_STATUS}
 Priority order: P0 memory > P1 disk > P2 stopped > P3 degraded > P4 housekeeping
 ${FORMULA_CONTENT}
--- a/supervisor/update-prompt.sh
+++ b/supervisor/update-prompt.sh
@ -1,47 +0,0 @@
 #!/usr/bin/env bash
 # update-prompt.sh — Append a lesson to a best-practices file
 #
 # Usage:
 #   ./supervisor/update-prompt.sh "best-practices/memory.md" "### Title\nBody text"
 #   ./supervisor/update-prompt.sh --from-file "best-practices/memory.md" /tmp/lesson.md
 #
 # Called by claude -p when it learns something during a fix.
 # Commits and pushes the update to the disinto repo.
 source "$(dirname "$0")/../lib/env.sh"
 TARGET_FILE="${FACTORY_ROOT}/supervisor/$1"
 shift
 if [ "$1" = "--from-file" ] && [ -f "$2" ]; then
  LESSON=$(cat "$2")
 elif [ -n "$1" ]; then
  LESSON="$1"
 else
  echo "Usage: update-prompt.sh <relative-path> '<lesson text>'" >&2
  echo "   or: update-prompt.sh <relative-path> --from-file <path>" >&2
  exit 1
 fi
 if [ ! -f "$TARGET_FILE" ]; then
  echo "Target file not found: $TARGET_FILE" >&2
  exit 1
 fi
 # Append under "Lessons Learned" section if it exists, otherwise at end
 if grep -q "## Lessons Learned" "$TARGET_FILE"; then
  echo "" >> "$TARGET_FILE"
  echo "$LESSON" >> "$TARGET_FILE"
 else
  echo "" >> "$TARGET_FILE"
  echo "## Lessons Learned" >> "$TARGET_FILE"
  echo "" >> "$TARGET_FILE"
  echo "$LESSON" >> "$TARGET_FILE"
 fi
 cd "$FACTORY_ROOT" || exit 1
 git add "supervisor/$1" 2>/dev/null || git add "$TARGET_FILE"
 git commit -m "supervisor: learned — $(echo "$LESSON" | head -1 | sed 's/^#* *//')" --no-verify 2>/dev/null
 git push origin main 2>/dev/null
 log "Updated $(basename "$TARGET_FILE") with new lesson"
--- a/templates/issue/bug.md
+++ b/templates/issue/bug.md
@ -0,0 +1,28 @@
 ---
 name: Bug Report
 about: Report a bug or unexpected behavior
 labels: bug-report
 ---
 ## What happened
 <!-- Describe the observed behavior -->
 ## What was expected
 <!-- Describe the expected behavior -->
 ## Steps to reproduce
 <!-- Required: List the exact steps to reproduce the issue -->
 1.
 2.
 3.
 ## Environment
 <!-- Browser, wallet, network, or other relevant environment details -->
 - Browser/Client:
 - Wallet (if applicable):
 - Network (if applicable):
 - Version:
--- a/tests/mock-forgejo.py
+++ b/tests/mock-forgejo.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """Mock Forgejo API server for CI smoke tests.
-Implements 15 Forgejo API endpoints that disinto init calls.
+Implements 16 Forgejo API endpoints that disinto init calls.
 State stored in-memory (dicts), responds instantly.
 """
@ -135,6 +135,7 @@ class ForgejoHandler(BaseHTTPRequestHandler):
            # Users patterns
            (r"^users/([^/]+)$", f"handle_{method}_users_username"),
            (r"^users/([^/]+)/tokens$", f"handle_{method}_users_username_tokens"),
            (r"^users/([^/]+)/tokens/([^/]+)$", f"handle_{method}_users_username_tokens_token_id"),
            (r"^users/([^/]+)/repos$", f"handle_{method}_users_username_repos"),
            # Repos patterns
            (r"^repos/([^/]+)/([^/]+)$", f"handle_{method}_repos_owner_repo"),
@ -149,6 +150,7 @@ class ForgejoHandler(BaseHTTPRequestHandler):
            # Admin patterns
            (r"^admin/users$", f"handle_{method}_admin_users"),
            (r"^admin/users/([^/]+)$", f"handle_{method}_admin_users_username"),
            (r"^admin/users/([^/]+)/repos$", f"handle_{method}_admin_users_username_repos"),
            # Org patterns
            (r"^orgs$", f"handle_{method}_orgs"),
        ]
@ -294,7 +296,10 @@ class ForgejoHandler(BaseHTTPRequestHandler):
    def handle_GET_users_username_tokens(self, query):
        """GET /api/v1/users/{username}/tokens"""
        # Support both token auth (for listing own tokens) and basic auth (for admin listing)
        username = require_token(self)
        if not username:
            username = require_basic_auth(self)
        if not username:
            json_response(self, 401, {"message": "invalid authentication"})
            return
@ -303,6 +308,38 @@ class ForgejoHandler(BaseHTTPRequestHandler):
        tokens = [t for t in state["tokens"].values() if t.get("username") == username]
        json_response(self, 200, tokens)
    def handle_DELETE_users_username_tokens_token_id(self, query):
        """DELETE /api/v1/users/{username}/tokens/{id}"""
        # Support both token auth and basic auth
        username = require_token(self)
        if not username:
            username = require_basic_auth(self)
        if not username:
            json_response(self, 401, {"message": "invalid authentication"})
            return
        parts = self.path.split("/")
        if len(parts) >= 8:
            token_id_str = parts[7]
        else:
            json_response(self, 404, {"message": "token not found"})
            return
        # Find and delete token by ID
        deleted = False
        for tok_sha1, tok in list(state["tokens"].items()):
            if tok.get("id") == int(token_id_str) and tok.get("username") == username:
                del state["tokens"][tok_sha1]
                deleted = True
                break
        if deleted:
            self.send_response(204)
            self.send_header("Content-Length", 0)
            self.end_headers()
        else:
            json_response(self, 404, {"message": "token not found"})
    def handle_POST_users_username_tokens(self, query):
        """POST /api/v1/users/{username}/tokens"""
        username = require_basic_auth(self)
@ -460,6 +497,56 @@ class ForgejoHandler(BaseHTTPRequestHandler):
        state["repos"][key] = repo
        json_response(self, 201, repo)
    def handle_POST_admin_users_username_repos(self, query):
        """POST /api/v1/admin/users/{username}/repos
        Admin API to create a repo under a specific user namespace.
        This allows creating repos in any user's namespace when authenticated as admin.
        """
        require_token(self)
        parts = self.path.split("/")
        # /api/v1/admin/users/{username}/repos → parts[5] is the username
        if len(parts) >= 7:
            target_user = parts[5]
        else:
            json_response(self, 400, {"message": "username required"})
            return
        if target_user not in state["users"]:
            json_response(self, 404, {"message": "user not found"})
            return
        content_length = int(self.headers.get("Content-Length", 0))
        body = self.rfile.read(content_length).decode("utf-8")
        data = json.loads(body) if body else {}
        repo_name = data.get("name")
        if not repo_name:
            json_response(self, 400, {"message": "name is required"})
            return
        repo_id = next_ids["repos"]
        next_ids["repos"] += 1
        key = f"{target_user}/{repo_name}"
        repo = {
            "id": repo_id,
            "full_name": key,
            "name": repo_name,
            "owner": {"id": state["users"][target_user]["id"], "login": target_user},
            "empty": not data.get("auto_init", False),
            "default_branch": data.get("default_branch", "main"),
            "description": data.get("description", ""),
            "private": data.get("private", False),
            "html_url": f"https://example.com/{key}",
            "ssh_url": f"git@example.com:{key}.git",
            "clone_url": f"https://example.com/{key}.git",
            "created_at": "2026-04-01T00:00:00Z",
        }
        state["repos"][key] = repo
        json_response(self, 201, repo)
    def handle_POST_user_repos(self, query):
        """POST /api/v1/user/repos"""
        require_token(self)
--- a/Show more
+++ b/Show more