fix: feat: make gardener and architect schedules configurable via env vars (#558 )

2026-04-10 10:29:56 +00:00
102 changed files with 1265 additions and 9853 deletions
--- a/.codeberg/ISSUE_TEMPLATE/bug.yaml
+++ b/.codeberg/ISSUE_TEMPLATE/bug.yaml
@ -1,7 +1,7 @@
 name: Bug Report
 about: Something is broken or behaving incorrectly
 labels:
-  - bug-report
+  - bug
 body:
  - type: textarea
    id: what
--- a/.codeberg/ISSUE_TEMPLATE/feature.yaml
+++ b/.codeberg/ISSUE_TEMPLATE/feature.yaml
--- a/.codeberg/ISSUE_TEMPLATE/refactor.yaml
+++ b/.codeberg/ISSUE_TEMPLATE/refactor.yaml
--- a/.dockerignore
+++ b/.dockerignore
@ -1,7 +1,8 @@
-# Secrets — prevent .env files and encrypted secrets from being baked into the image
+# Secrets — prevent .env files from being baked into the image
 .env
 .env.enc
-secrets/
+.env.vault
 .env.vault.enc
 # Version control — .git is huge and not needed in image
 .git
--- a/.env.example
+++ b/.env.example
@ -19,54 +19,27 @@ FORGE_URL=http://localhost:3000             # [CONFIG] local Forgejo instance
 # ── Auth tokens ───────────────────────────────────────────────────────────
 # Each agent has its own Forgejo account and API token (#747).
 # Per-agent tokens fall back to FORGE_TOKEN if not set.
 #
 # Tokens and passwords are auto-generated by `disinto init` and stored in .env.
 # Each bot user gets:
 #   - FORGE_TOKEN_<BOT> = API token for REST calls (user identity via /api/v1/user)
 #   - FORGE_PASS_<BOT>  = password for git HTTP push (#361, Forgejo 11.x limitation)
 #
 # Local-model agents (agents-llama) use FORGE_TOKEN_LLAMA / FORGE_PASS_LLAMA
 # with FORGE_BOT_USER_LLAMA=dev-qwen to ensure correct attribution (#563).
 FORGE_TOKEN=                               # [SECRET] dev-bot API token (default for all agents)
-FORGE_PASS=                                # [SECRET] dev-bot password for git HTTP push (#361)
+FORGE_TOKEN_DEVQWEN=                       # [SECRET] dev-qwen API token (for agents-llama)
 FORGE_TOKEN_LLAMA=                         # [SECRET] dev-qwen API token (for agents-llama)
 FORGE_PASS_LLAMA=                          # [SECRET] dev-qwen password for git HTTP push
 FORGE_REVIEW_TOKEN=                        # [SECRET] review-bot API token
 FORGE_REVIEW_PASS=                         # [SECRET] review-bot password for git HTTP push
 FORGE_PLANNER_TOKEN=                       # [SECRET] planner-bot API token
 FORGE_PLANNER_PASS=                        # [SECRET] planner-bot password for git HTTP push
 FORGE_GARDENER_TOKEN=                      # [SECRET] gardener-bot API token
 FORGE_GARDENER_PASS=                       # [SECRET] gardener-bot password for git HTTP push
 FORGE_VAULT_TOKEN=                         # [SECRET] vault-bot API token
 FORGE_VAULT_PASS=                          # [SECRET] vault-bot password for git HTTP push
 FORGE_SUPERVISOR_TOKEN=                    # [SECRET] supervisor-bot API token
 FORGE_SUPERVISOR_PASS=                     # [SECRET] supervisor-bot password for git HTTP push
 FORGE_PREDICTOR_TOKEN=                     # [SECRET] predictor-bot API token
 FORGE_PREDICTOR_PASS=                      # [SECRET] predictor-bot password for git HTTP push
 FORGE_ARCHITECT_TOKEN=                     # [SECRET] architect-bot API token
-FORGE_ARCHITECT_PASS=                      # [SECRET] architect-bot password for git HTTP push
+FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot
 FORGE_FILER_TOKEN=                         # [SECRET] filer-bot API token (issues:write on project repo only)
 FORGE_FILER_PASS=                          # [SECRET] filer-bot password for git HTTP push
 FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot,filer-bot
 # ── Backwards compatibility ───────────────────────────────────────────────
 # If CODEBERG_TOKEN is set but FORGE_TOKEN is not, env.sh falls back to
 # CODEBERG_TOKEN automatically (same for REVIEW_BOT_TOKEN, CODEBERG_REPO,
 # CODEBERG_BOT_USERNAMES). No action needed for existing deployments.
 # Per-agent tokens default to FORGE_TOKEN when unset (single-token setups).
 #
 # Note: `disinto init` auto-generates all bot tokens/passwords when you
 # configure [agents.llama] in a project TOML. The credentials are stored
 # in .env.enc (encrypted) or .env (plaintext fallback).
 # ── Woodpecker CI ─────────────────────────────────────────────────────────
 WOODPECKER_TOKEN=                          # [SECRET] Woodpecker API token
 WOODPECKER_SERVER=http://localhost:8000     # [CONFIG] Woodpecker server URL
 WOODPECKER_AGENT_SECRET=                   # [SECRET] shared secret for server↔agent auth (auto-generated)
 # Woodpecker privileged-plugin allowlist — comma-separated image names
 # Add plugins/docker (and others) here to allow privileged execution
 WOODPECKER_PLUGINS_PRIVILEGED=plugins/docker
 # WOODPECKER_REPO_ID — now per-project, set in projects/*.toml [ci] section
 # Woodpecker Postgres (for direct DB queries)
@ -75,59 +48,26 @@ WOODPECKER_DB_USER=woodpecker              # [CONFIG] Postgres user
 WOODPECKER_DB_HOST=127.0.0.1              # [CONFIG] Postgres host
 WOODPECKER_DB_NAME=woodpecker              # [CONFIG] Postgres database name
 # ── Chat OAuth (#708) ────────────────────────────────────────────────────
 CHAT_OAUTH_CLIENT_ID=                     # [SECRET] Chat OAuth2 client ID (auto-generated by init)
 CHAT_OAUTH_CLIENT_SECRET=                 # [SECRET] Chat OAuth2 client secret (auto-generated by init)
 DISINTO_CHAT_ALLOWED_USERS=               # [CONFIG] CSV of allowed usernames (disinto-admin always allowed)
 FORWARD_AUTH_SECRET=                      # [SECRET] Shared secret for Caddy ↔ chat forward_auth (#709)
 # ── Vault-only secrets (DO NOT put these in .env) ────────────────────────
 # These tokens grant access to external systems (GitHub, ClawHub, deploy targets).
-# They live ONLY in secrets/<NAME>.enc (age-encrypted, one file per key) and are
+# They live ONLY in .env.vault.enc and are injected into the ephemeral runner
-# decrypted into the ephemeral runner container at fire time (#745, #777).
+# container at fire time (#745). lib/env.sh explicitly unsets them so agents
-# lib/env.sh explicitly unsets them so agents can never hold them directly —
+# can never hold them directly — all external actions go through vault dispatch.
 # all external actions go through vault dispatch.
 #
 #   GITHUB_TOKEN          — GitHub API access (publish, deploy, post)
 #   CLAWHUB_TOKEN         — ClawHub registry credentials (publish)
 #   CADDY_SSH_KEY         — SSH key for Caddy log collection
 #   (deploy keys)         — SSH keys for deployment targets
 #
-# To manage secrets: disinto secrets add/show/remove/list
+# To manage vault secrets: disinto secrets edit-vault
 # (vault redesign in progress: PR-based approval, see #73-#77)
 # ── Project-specific secrets ──────────────────────────────────────────────
 # Store all project secrets here so formulas reference env vars, never hardcode.
 BASE_RPC_URL=                              # [SECRET] on-chain RPC endpoint
 # ── Local Qwen dev agent (optional) ──────────────────────────────────────
 # Set ENABLE_LLAMA_AGENT=1 to emit agents-llama in docker-compose.yml.
 # Requires a running llama-server reachable at ANTHROPIC_BASE_URL.
 # See docs/agents-llama.md for details.
 ENABLE_LLAMA_AGENT=0                       # [CONFIG] 1 = enable agents-llama service
 ANTHROPIC_BASE_URL=                        # [CONFIG] e.g. http://host.docker.internal:8081
 # ── Tuning ────────────────────────────────────────────────────────────────
 CLAUDE_TIMEOUT=7200                        # [CONFIG] max seconds per Claude invocation
 # ── Host paths (Nomad-portable) ────────────────────────────────────────────
 # These env vars externalize host-side bind-mount paths from docker-compose.yml.
 # At cutover, Nomad jobspecs reference the same vars — no path translation.
 # Defaults point at current paths so an empty .env override still works.
 CLAUDE_BIN_DIR=/usr/local/bin/claude          # [CONFIG] host path to claude CLI binary (resolved by `disinto init`)
 CLAUDE_CONFIG_FILE=${HOME}/.claude.json       # [CONFIG] host path to claude config JSON file
 CLAUDE_DIR=${HOME}/.claude                    # [CONFIG] host path to .claude directory (reproduce/edge)
 AGENT_SSH_DIR=${HOME}/.ssh                    # [CONFIG] host path to SSH keys directory
 SOPS_AGE_DIR=${HOME}/.config/sops/age         # [CONFIG] host path to SOPS age key directory
 # ── Claude Code shared OAuth state ─────────────────────────────────────────
 # Shared directory used by every factory container so Claude Code's internal
 # proper-lockfile-based OAuth refresh lock works across containers. Both
 # values must live outside $HOME (so docker bind mounts don't depend on UID
 # mapping) and must be the same absolute path on host and inside each
 # container. See docs/CLAUDE-AUTH-CONCURRENCY.md.
 CLAUDE_SHARED_DIR=/var/lib/disinto/claude-shared
 CLAUDE_CONFIG_DIR=${CLAUDE_SHARED_DIR}/config
 # ── Factory safety ────────────────────────────────────────────────────────
 # Disables Claude Code auto-updater, telemetry, error reporting, and bug
 # command. Factory sessions are production processes — they must never phone
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,7 @@
 # Encrypted secrets — safe to commit (SOPS-encrypted with age)
 !.env.enc
 !.env.vault.enc
 !.sops.yaml
 # Per-box project config (generated by disinto init)
@ -27,15 +28,3 @@ secrets/
 # Pre-built binaries for Docker builds (avoid network calls during build)
 docker/agents/bin/
 # Generated docker-compose.yml (run 'bin/disinto init' to regenerate)
 # Note: This file is now committed to track volume mount configuration
 # docker-compose.yml
 # Generated Caddyfile — single source of truth is generate_caddyfile in lib/generators.sh
 docker/Caddyfile
 # Python bytecode
 __pycache__/
 *.pyc
 *.pyo
--- a/.woodpecker/agent-smoke.sh
+++ b/.woodpecker/agent-smoke.sh
@ -11,11 +11,6 @@ set -euo pipefail
 cd "$(dirname "$0")/.."
 # CI-side filesystem snapshot: show lib/ state at smoke time (#600)
 echo "=== smoke environment snapshot ==="
 ls -la lib/ 2>&1 | head -50
 echo "=== "
 FAILED=0
 # ── helpers ─────────────────────────────────────────────────────────────────
@ -98,37 +93,32 @@ echo "syntax check done"
 echo "=== 2/2  Function resolution ==="
-# Enumerate ALL lib/*.sh files in stable lexicographic order (#742).
+# Functions provided by shared lib files (available to all agent scripts via source).
 # Previous approach used a hand-maintained REQUIRED_LIBS list, which silently
 # became incomplete as new libs were added, producing partial LIB_FUNS that
 # caused non-deterministic "undef" failures.
 #
-# Excluded from LIB_FUNS (not sourced inline by agents):
+# Included — these are inline-sourced by agent scripts:
 #   lib/env.sh              — sourced by every agent (log, forge_api, etc.)
 #   lib/agent-sdk.sh        — sourced by SDK agents (agent_run, agent_recover_session)
 #   lib/ci-helpers.sh       — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.)
 #   lib/load-project.sh     — sourced by env.sh when PROJECT_TOML is set
 #   lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue)
 #   lib/secret-scan.sh      — sourced by file-action-issue.sh (scan_for_secrets, redact_secrets)
 #   lib/formula-session.sh  — sourced by formula-driven agents (acquire_run_lock, check_memory, etc.)
 #   lib/mirrors.sh          — sourced by merge sites (mirror_push)
 #   lib/guard.sh            — sourced by all polling-loop entry points (check_active)
 #   lib/issue-lifecycle.sh  — sourced by agents for issue claim/release/block/deps
 #   lib/worktree.sh         — sourced by agents for worktree create/recover/cleanup/preserve
 #
 # Excluded — not sourced inline by agents:
 #   lib/tea-helpers.sh      — sourced conditionally by env.sh (tea_file_issue, etc.); checked standalone below
 #   lib/ci-debug.sh         — standalone CLI tool, run directly (not sourced)
 #   lib/parse-deps.sh       — executed via `bash lib/parse-deps.sh` (not sourced)
 #   lib/hooks/*.sh          — Claude Code hook scripts, executed by the harness (not sourced)
-EXCLUDED_LIBS="lib/ci-debug.sh lib/parse-deps.sh"
+#
-
+# If a new lib file is added and sourced by agents, add it to LIB_FUNS below
-# Build the list of lib files in deterministic order (LC_ALL=C sort).
+# and add a check_script call for it in the lib files section further down.
 # Fail loudly if no lib files are found — checkout is broken.
 mapfile -t ALL_LIBS < <(LC_ALL=C find lib -maxdepth 1 -name '*.sh' -print | LC_ALL=C sort)
 if [ "${#ALL_LIBS[@]}" -eq 0 ]; then
  echo 'FAIL [no-libs] no lib/*.sh files found at smoke time' >&2
  printf '  pwd=%s\n' "$(pwd)" >&2
  echo '=== SMOKE TEST FAILED (precondition) ===' >&2
  exit 2
 fi
 # Build LIB_FUNS from all non-excluded lib files.
 # Use set -e inside the subshell so a failed get_fns aborts loudly
 # instead of silently shrinking the function list.
 LIB_FUNS=$(
-  set -e
+  for f in lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do
-  for f in "${ALL_LIBS[@]}"; do
+    if [ -f "$f" ]; then get_fns "$f"; fi
    # shellcheck disable=SC2086
    skip=0; for ex in $EXCLUDED_LIBS; do [ "$f" = "$ex" ] && skip=1; done
    [ "$skip" -eq 1 ] && continue
    get_fns "$f"
  done | sort -u
 )
@ -180,15 +170,8 @@ check_script() {
  while IFS= read -r fn; do
    [ -z "$fn" ] && continue
    is_known_cmd "$fn" && continue
-    # Use here-string (<<<) instead of pipe to avoid SIGPIPE race (#742):
+    if ! printf '%s\n' "$all_fns" | grep -qxF "$fn"; then
    # with pipefail, `printf | grep -q` can fail when grep closes the pipe
    # early after finding a match, causing printf to get SIGPIPE (exit 141).
    # This produced non-deterministic false "undef" failures.
    if ! grep -qxF "$fn" <<< "$all_fns"; then
      printf 'FAIL [undef] %s: %s\n' "$script" "$fn"
      printf '  all_fns count: %d\n' "$(grep -c . <<< "$all_fns")"
      printf '  LIB_FUNS contains "%s": %s\n' "$fn" "$(grep -cxF "$fn" <<< "$LIB_FUNS")"
      printf '  defining lib (if any): %s\n' "$(grep -l "^[[:space:]]*${fn}[[:space:]]*()" lib/*.sh 2>/dev/null | tr '\n' ' ')"
      FAILED=1
    fi
  done <<< "$candidates"
@ -201,8 +184,9 @@ check_script lib/env.sh              lib/mirrors.sh
 check_script lib/agent-sdk.sh
 check_script lib/ci-helpers.sh
 check_script lib/secret-scan.sh
 check_script lib/file-action-issue.sh   lib/secret-scan.sh
 check_script lib/tea-helpers.sh         lib/secret-scan.sh
-check_script lib/formula-session.sh     lib/ops-setup.sh
+check_script lib/formula-session.sh
 check_script lib/load-project.sh
 check_script lib/mirrors.sh              lib/env.sh
 check_script lib/guard.sh
@ -213,13 +197,12 @@ check_script lib/issue-lifecycle.sh   lib/secret-scan.sh
 # Still checked for function resolution against LIB_FUNS + own definitions.
 check_script lib/ci-debug.sh
 check_script lib/parse-deps.sh
 check_script lib/sprint-filer.sh
 # Agent scripts — list cross-sourced files where function scope flows across files.
 check_script dev/dev-agent.sh
 check_script dev/dev-poll.sh
 check_script dev/phase-test.sh
-check_script gardener/gardener-run.sh    lib/formula-session.sh
+check_script gardener/gardener-run.sh
 check_script review/review-pr.sh         lib/agent-sdk.sh
 check_script review/review-poll.sh
 check_script planner/planner-run.sh      lib/formula-session.sh
--- a/.woodpecker/detect-duplicates.py
+++ b/.woodpecker/detect-duplicates.py
@ -292,8 +292,6 @@ def main() -> int:
        "21aec56a99d5252b23fb9a38b895e8e8": "Verification helper: check body for Decomposed from pattern",
        "60ea98b3604557d539193b2a6624e232": "Verification helper: append sub-issue number",
        "9f6ae8e7811575b964279d8820494eb0": "Verification helper: for loop done pattern",
        # Standard lib source block shared across formula-driven agent run scripts
        "330e5809a00b95ade1a5fce2d749b94b": "Standard lib source block (env.sh, formula-session.sh, worktree.sh, guard.sh, agent-sdk.sh)",
    }
    if not sh_files:
--- a/.woodpecker/publish-images.yml
+++ b/.woodpecker/publish-images.yml
@ -1,64 +0,0 @@
 # .woodpecker/publish-images.yml — Build and push versioned container images
 # Triggered on tag pushes (e.g. v1.2.3). Builds and pushes:
 #   - ghcr.io/disinto/agents:<tag>
 #   - ghcr.io/disinto/reproduce:<tag>
 #   - ghcr.io/disinto/edge:<tag>
 #
 # Requires GHCR_TOKEN secret configured in Woodpecker with push access
 # to ghcr.io/disinto.
 when:
  event: tag
  ref: refs/tags/v*
 clone:
  git:
    image: alpine/git
    commands:
      - AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
      - git clone --depth 1 "$AUTH_URL" .
      - git fetch --depth 1 origin "$CI_COMMIT_REF"
      - git checkout FETCH_HEAD
 steps:
  - name: build-and-push-agents
    image: plugins/docker
    settings:
      repo: ghcr.io/disinto/agents
      registry: ghcr.io
      dockerfile: docker/agents/Dockerfile
      context: .
      tags:
        - ${CI_COMMIT_TAG}
        - latest
      username: disinto
      password:
        from_secret: GHCR_TOKEN
  - name: build-and-push-reproduce
    image: plugins/docker
    settings:
      repo: ghcr.io/disinto/reproduce
      registry: ghcr.io
      dockerfile: docker/reproduce/Dockerfile
      context: .
      tags:
        - ${CI_COMMIT_TAG}
        - latest
      username: disinto
      password:
        from_secret: GHCR_TOKEN
  - name: build-and-push-edge
    image: plugins/docker
    settings:
      repo: ghcr.io/disinto/edge
      registry: ghcr.io
      dockerfile: docker/edge/Dockerfile
      context: docker/edge
      tags:
        - ${CI_COMMIT_TAG}
        - latest
      username: disinto
      password:
        from_secret: GHCR_TOKEN
--- a/.woodpecker/run-secret-scan.sh
+++ b/.woodpecker/run-secret-scan.sh
@ -1,68 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # run-secret-scan.sh — CI wrapper for lib/secret-scan.sh
 #
 # Scans files changed in this PR for plaintext secrets.
 # Exits non-zero if any secret is detected.
 # shellcheck source=../lib/secret-scan.sh
 source lib/secret-scan.sh
 # Path patterns considered secret-adjacent
 SECRET_PATH_PATTERNS=(
  '\.env'
  'tools/vault-.*\.sh'
  'nomad/'
  'vault/'
  'action-vault/'
  'lib/hvault\.sh'
  'lib/action-vault\.sh'
 )
 # Build a single regex from patterns
 path_regex=$(printf '%s|' "${SECRET_PATH_PATTERNS[@]}")
 path_regex="${path_regex%|}"
 # Get files changed in this PR vs target branch.
 # Note: shallow clone (depth 50) may lack the merge base for very large PRs,
 # causing git diff to fail — || true means the gate skips rather than blocks.
 changed_files=$(git diff --name-only --diff-filter=ACMR "origin/${CI_COMMIT_TARGET_BRANCH}...HEAD" || true)
 if [ -z "$changed_files" ]; then
  echo "secret-scan: no changed files found, skipping"
  exit 0
 fi
 # Filter to secret-adjacent paths only
 target_files=$(printf '%s\n' "$changed_files" | grep -E "$path_regex" || true)
 if [ -z "$target_files" ]; then
  echo "secret-scan: no secret-adjacent files changed, skipping"
  exit 0
 fi
 echo "secret-scan: scanning $(printf '%s\n' "$target_files" | wc -l) file(s):"
 printf '  %s\n' "$target_files"
 failures=0
 while IFS= read -r file; do
  # Skip deleted files / non-existent
  [ -f "$file" ] || continue
  # Skip binary files
  file -b --mime-encoding "$file" 2>/dev/null | grep -q binary && continue
  content=$(cat "$file")
  if ! scan_for_secrets "$content"; then
    echo "FAIL: secret detected in $file"
    failures=$((failures + 1))
  fi
 done <<< "$target_files"
 if [ "$failures" -gt 0 ]; then
  echo ""
  echo "secret-scan: $failures file(s) contain potential secrets — merge blocked"
  echo "If these are false positives, verify patterns in lib/secret-scan.sh"
  exit 1
 fi
 echo "secret-scan: all files clean"
--- a/.woodpecker/secret-scan.yml
+++ b/.woodpecker/secret-scan.yml
@ -1,32 +0,0 @@
 # .woodpecker/secret-scan.yml — Block PRs that leak plaintext secrets
 #
 # Triggers on pull requests touching secret-adjacent paths.
 # Sources lib/secret-scan.sh and scans each changed file's content.
 # Exits non-zero if any potential secret is detected.
 when:
  - event: pull_request
    path:
      - ".env*"
      - "tools/vault-*.sh"
      - "nomad/**/*"
      - "vault/**/*"
      - "action-vault/**/*"
      - "lib/hvault.sh"
      - "lib/action-vault.sh"
 clone:
  git:
    image: alpine/git
    commands:
      - AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
      - git clone --depth 50 "$AUTH_URL" .
      - git fetch --depth 50 origin "$CI_COMMIT_REF" "$CI_COMMIT_TARGET_BRANCH"
      - git checkout FETCH_HEAD
 steps:
  - name: secret-scan
    image: alpine:3
    commands:
      - apk add --no-cache bash git grep file
      - bash .woodpecker/run-secret-scan.sh
--- a/AGENTS.md
+++ b/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: c363ee0aea2ae447daab28c2c850d6abefc8c6b5 -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Disinto — Agent Instructions
 ## What this repo is
@ -31,19 +31,19 @@ disinto/                 (code repo)
 ├── supervisor/    supervisor-run.sh — formula-driven health monitoring (polling-loop executor)
 │                  preflight.sh — pre-flight data collection for supervisor formula
 ├── architect/     architect-run.sh — strategic decomposition of vision into sprints
-├── action-vault/  vault-env.sh — shared env setup (vault redesign in progress, see #73-#77)
+├── vault/         vault-env.sh — shared env setup (vault redesign in progress, see #73-#77)
 │                  SCHEMA.md — vault item schema documentation
 │                  validate.sh — vault item validator
 │                  examples/ — example vault action TOMLs (promote, publish, release, webhook-call)
-├── lib/           env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py, branch-protection.sh, secret-scan.sh, tea-helpers.sh, action-vault.sh, ci-log-reader.py, git-creds.sh, sprint-filer.sh, hvault.sh
+├── lib/           env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py,
 │                  branch-protection.sh, secret-scan.sh, tea-helpers.sh, vault.sh, ci-log-reader.py
 │                  hooks/ — Claude Code session hooks (on-compact-reinject, on-idle-stop, on-phase-change, on-pretooluse-guard, on-session-end, on-stop-failure)
 ├── projects/      *.toml.example — templates; *.toml — local per-box config (gitignored)
 ├── formulas/      Issue templates (TOML specs for multi-step agent tasks)
-├── docker/        Dockerfiles and entrypoints: reproduce, triage, edge dispatcher, chat (server.py, entrypoint-chat.sh, Dockerfile, ui/)
+├── docker/        Dockerfiles and entrypoints for reproduce, triage, and edge dispatcher agents
 ├── tools/         Operational tools: edge-control/ (register.sh, install.sh, verify-chat-sandbox.sh)
 ├── docs/          Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md)
 ├── site/          disinto.ai website content
-├── tests/         Test files (mock-forgejo.py, smoke-init.sh, lib-hvault.bats)
+├── tests/         Test files (mock-forgejo.py, smoke-init.sh)
 ├── templates/     Issue templates
 ├── bin/           The `disinto` CLI script
 ├── disinto-factory/  Setup documentation and skill
@ -86,7 +86,7 @@ Each agent has a `.profile` repository on Forgejo storing `knowledge/lessons-lea
 - All scripts start with `#!/usr/bin/env bash` and `set -euo pipefail`
 - Source shared environment: `source "$(dirname "$0")/../lib/env.sh"`
 - Log to `$LOGFILE` using the `log()` function from env.sh or defined locally
- Never hardcode secrets — agent secrets come from `.env.enc`, vault secrets from `secrets/<NAME>.enc` (age-encrypted, one file per key)
+- Never hardcode secrets — agent secrets come from `.env.enc`, vault secrets from `.env.vault.enc` (or `.env`/`.env.vault` fallback)
 - Never embed secrets in issue bodies, PR descriptions, or comments — use env var references (e.g. `$BASE_RPC_URL`)
 - ShellCheck must pass (CI runs `shellcheck` on all `.sh` files)
 - Avoid duplicate code — shared helpers go in `lib/`
@ -113,13 +113,10 @@ bash dev/phase-test.sh
 | Supervisor | `supervisor/` | Health monitoring | [supervisor/AGENTS.md](supervisor/AGENTS.md) |
 | Planner | `planner/` | Strategic planning | [planner/AGENTS.md](planner/AGENTS.md) |
 | Predictor | `predictor/` | Infrastructure pattern detection | [predictor/AGENTS.md](predictor/AGENTS.md) |
-| Architect | `architect/` | Strategic decomposition (read-only on project repo) | [architect/AGENTS.md](architect/AGENTS.md) |
+| Architect | `architect/` | Strategic decomposition | [architect/AGENTS.md](architect/AGENTS.md) |
 | Filer | `lib/sprint-filer.sh` | Sub-issue filing from merged sprint PRs | ops repo pipeline (deferred, see #779) |
 | Reproduce | `docker/reproduce/` | Bug reproduction using Playwright MCP | `formulas/reproduce.toml` |
 | Triage | `docker/reproduce/` | Deep root cause analysis | `formulas/triage.toml` |
 | Edge dispatcher | `docker/edge/` | Polls ops repo for vault actions, executes via Claude sessions | `docker/edge/dispatcher.sh` |
 | agents-llama | `docker/agents/` (same image) | Local-Qwen dev agent (`AGENT_ROLES=dev`), gated on `ENABLE_LLAMA_AGENT=1` | [docs/agents-llama.md](docs/agents-llama.md) |
 | agents-llama-all | `docker/agents/` (same image) | Local-Qwen all-roles agent (all 7 roles), profile `agents-llama-all` | [docs/agents-llama.md](docs/agents-llama.md) |
 > **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77).
 > See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow details.
@ -138,7 +135,7 @@ Issues flow: `backlog` → `in-progress` → PR → CI → review → merge →
 |---|---|---|
 | `backlog` | Issue is queued for implementation. Dev-poll picks the first ready one. | Planner, gardener, humans |
 | `priority` | Queue tier above plain backlog. Issues with both `priority` and `backlog` are picked before plain `backlog` issues. FIFO within each tier. | Planner, humans |
-| `in-progress` | Dev-agent is actively working on this issue. Only one issue per project is in-progress at a time. Also set on vision issues by filer-bot when sub-issues are filed (#764). | dev-agent.sh (claims issue), filer-bot (vision issues) |
+| `in-progress` | Dev-agent is actively working on this issue. Only one issue per project is in-progress at a time. | dev-agent.sh (claims issue) |
 | `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, dev-poll.sh (on failure) |
 | `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) |
 | `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) |
@ -177,17 +174,19 @@ Humans write these. Agents read and enforce them.
 | ID | Decision | Rationale |
 |---|---|---|
 | AD-001 | Nervous system runs from a polling loop (`docker/agents/entrypoint.sh`), not PR-based actions. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) |
-| AD-002 | **Concurrency is bounded per LLM backend, not per project.** One concurrent Claude session per OAuth credential pool; one concurrent session per llama-server instance. Containers with disjoint backends may run in parallel. | The single-thread invariant is about *backends*, not pipelines. **(a) Anthropic OAuth credentials race on token refresh** — each container uses a per-session `CLAUDE_CONFIG_DIR`, so Claude Code's native lockfile-based OAuth refresh handles contention automatically without external serialization. (Legacy: set `CLAUDE_EXTERNAL_LOCK=1` to re-enable the old `flock session.lock` wrapper for rollback.) **(b) llama-server has finite VRAM and one KV cache** — parallel inference thrashes the cache and risks OOM. All llama-backed agents serialize on the same lock. **(c) Disjoint backends are free to parallelize.** Today `disinto-agents` (Anthropic OAuth, runs `review,gardener`) runs concurrently with `disinto-agents-llama` (llama, runs `dev`) on the same project — they share neither OAuth state nor llama VRAM. **(d) Per-project work-conflict safety** (no duplicate dev work, no merge conflicts on the same branch) is enforced by `issue_claim` (assignee + `in-progress` label) and per-issue worktrees — that's a separate guard that does NOT depend on this AD. |
+| AD-002 | **Concurrency is bounded per LLM backend, not per project.** One concurrent Claude session per OAuth credential pool; one concurrent session per llama-server instance. Containers with disjoint backends may run in parallel. | The single-thread invariant is about *backends*, not pipelines. **(a) Anthropic OAuth credentials race on token refresh** — two sessions sharing one mounted `~/.claude` will trip over each other during rotation and 401. All agents inside an OAuth-mounted container serialize on `flock session.lock`. **(b) llama-server has finite VRAM and one KV cache** — parallel inference thrashes the cache and risks OOM. All llama-backed agents serialize on the same lock. **(c) Disjoint backends are free to parallelize.** Today `disinto-agents` (Anthropic OAuth, runs `review,gardener`) runs concurrently with `disinto-agents-llama` (llama, runs `dev`) on the same project — they share neither OAuth state nor llama VRAM. **(d) Per-project work-conflict safety** (no duplicate dev work, no merge conflicts on the same branch) is enforced by `issue_claim` (assignee + `in-progress` label) and per-issue worktrees — that's a separate guard that does NOT depend on this AD. |
 | AD-003 | The runtime creates and destroys, the formula preserves. | Runtime manages worktrees/sessions/temp. Formulas commit knowledge to git before signaling done. |
 | AD-004 | Event-driven > polling > fixed delays. | Never `waitForTimeout` or hardcoded sleep. Use phase files, webhooks, or poll loops with backoff. |
-| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc` (SOPS-encrypted), vault secrets in `secrets/<NAME>.enc` (age-encrypted, one file per key). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. |
+| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (SOPS-encrypted when available; plaintext `.env`/`.env.vault` fallback supported). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. |
-| AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `secrets/<NAME>.enc` and are decrypted into the ephemeral runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. (Vault redesign in progress: PR-based approval on ops repo, see #73-#77) |
+| AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. (Vault redesign in progress: PR-based approval on ops repo, see #73-#77) |
 **Who enforces what:**
 - **Gardener** checks open backlog issues against ADs during grooming; closes violations with a comment referencing the AD number.
 - **Planner** plans within the architecture; does not create issues that violate ADs.
 - **Dev-agent** reads AGENTS.md before implementing; refuses work that violates ADs.
- **AD-002 is a runtime invariant; nothing for the gardener to check at issue-groom time.** OAuth concurrency is handled by per-session `CLAUDE_CONFIG_DIR` isolation (with `CLAUDE_EXTERNAL_LOCK` as a rollback flag). Per-issue work is enforced by `issue_claim`. A violation manifests as a 401 or VRAM OOM in agent logs, not as a malformed issue.
+- **AD-002 is a runtime invariant; nothing for the gardener to check at issue-groom time.** Concurrency is enforced by `flock session.lock` within each container and by `issue_claim` for per-issue work. A violation manifests as a 401 or VRAM OOM in agent logs, not as a malformed issue.
 ---
 ## Phase-Signaling Protocol
@ -197,4 +196,6 @@ at each phase boundary by writing to a phase file (e.g.
 Key phases: `PHASE:awaiting_ci` → `PHASE:awaiting_review` → `PHASE:done`.
 Also: `PHASE:escalate` (needs human input), `PHASE:failed`.
-See [docs/PHASE-PROTOCOL.md](docs/PHASE-PROTOCOL.md) for the complete spec, orchestrator reaction matrix, sequence diagram, and crash recovery.
+
 See [docs/PHASE-PROTOCOL.md](docs/PHASE-PROTOCOL.md) for the complete spec
 including the orchestrator reaction matrix, sequence diagram, and crash recovery.
--- a/README.md
+++ b/README.md
@ -72,8 +72,6 @@ cd disinto
 disinto init https://github.com/yourorg/yourproject
 ```
 This will generate a `docker-compose.yml` file.
 Or configure manually — edit `.env` with your values:
 ```bash
@ -99,7 +97,7 @@ CLAUDE_TIMEOUT=7200         # max seconds per Claude invocation (default: 2h)
 docker compose up -d
 # 4. Verify the entrypoint loop is running
-docker exec disinto-agents tail -f /home/agent/data/agent-entrypoint.log
+docker exec disinto-agents-1 tail -f /home/agent/data/agent-entrypoint.log
 ```
 ## Directory Structure
--- a/architect/AGENTS.md
+++ b/architect/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: c363ee0aea2ae447daab28c2c850d6abefc8c6b5 -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Architect — Agent Instructions
 ## What this agent is
@ -10,9 +10,9 @@ converses with humans through PR comments.
 ## Role
 - **Input**: Vision issues from VISION.md, prerequisite tree from ops repo
- **Output**: Sprint proposals as PRs on the ops repo (with embedded `## Sub-issues` blocks)
+- **Output**: Sprint proposals as PRs on the ops repo, sub-issue files
 - **Mechanism**: Bash-driven orchestration in `architect-run.sh`, pitching formula via `formulas/run-architect.toml`
- **Identity**: `architect-bot` on Forgejo (READ-ONLY on project repo, write on ops repo only — #764)
+- **Identity**: `architect-bot` on Forgejo
 ## Responsibilities
@ -24,66 +24,40 @@ converses with humans through PR comments.
   acceptance criteria and dependencies
 4. **Human conversation**: Respond to PR comments, refine sprint proposals based
   on human feedback
-5. **Sub-issue definition**: Define concrete sub-issues in the `## Sub-issues`
+5. **Sub-issue filing**: After design forks are resolved, file concrete sub-issues
-   block of the sprint spec. Filing is handled by `filer-bot` after sprint PR
+   for implementation
   merge (#764)
 ## Formula
 The architect pitching is driven by `formulas/run-architect.toml`. This formula defines
 the steps for:
 - Research: analyzing vision items and prerequisite tree
- Pitch: creating structured sprint PRs with embedded `## Sub-issues` blocks
+- Pitch: creating structured sprint PRs
- Design Q&A: refining the sprint via PR comments after human ACCEPT
+- Sub-issue filing: creating concrete implementation issues
 ## Bash-driven orchestration
 Bash in `architect-run.sh` handles state detection and orchestration:
 - **Deterministic state detection**: Bash reads the Forgejo reviews API to detect
-  ACCEPT/REJECT decisions — checks both formal APPROVED reviews and PR comments, not just comments (#718)
+  ACCEPT/REJECT decisions — no model-dependent API parsing
 - **Human guidance injection**: Review body text from ACCEPT reviews is injected
  directly into the research prompt as context
 - **Response processing**: When ACCEPT/REJECT responses are detected, bash invokes
  the agent with appropriate context (session resumed for questions phase)
 - **Pitch capture**: `pitch_output` is written to a temp file instead of captured via `$()` subshell, because `agent_run` writes to side-channels (`SID_FILE`, `LOGFILE`) that subshell capture would suppress (#716)
 - **PR URL construction**: existing-PR check uses `${FORGE_API}/pulls` directly (not `${FORGE_API}/repos/…`) — the base URL already includes the repos segment (#717)
 ### State transitions
 ```
 New vision issue → pitch PR (model generates pitch, bash creates PR)
  ↓
-APPROVED review → start design questions (model posts Q1:, adds Design forks section)
+ACCEPT review → research + questions (model, session saved to $SID_FILE)
  ↓
-Answers received → continue Q&A (model processes answers, posts follow-ups)
+Answers received → sub-issue filing (model, session resumed via --resume)
  ↓
 All forks resolved → finalize ## Sub-issues section in sprint spec
  ↓
 Sprint PR merged → filer-bot files sub-issues on project repo (#764)
  ↓
 REJECT review → close PR + journal (model processes rejection, bash merges PR)
 ```
 ### Vision issue lifecycle
 Vision issues decompose into sprint sub-issues. Sub-issues are defined in the
 `## Sub-issues` block of the sprint spec (between `<!-- filer:begin -->` and
 `<!-- filer:end -->` markers) and filed by `filer-bot` after the sprint PR merges
 on the ops repo (#764).
 Each filer-created sub-issue carries a `<!-- decomposed-from: #<vision>, sprint: <slug>, id: <id> -->`
 marker in its body for idempotency and traceability.
 The filer-bot (via `lib/sprint-filer.sh`) handles vision lifecycle:
 1. After filing sub-issues, adds `in-progress` label to the vision issue
 2. On each run, checks if all sub-issues for a vision are closed
 3. If all closed, posts a summary comment and closes the vision issue
 The architect no longer writes to the project repo — it is read-only (#764).
 All project-repo writes (issue filing, label management, vision closure) are
 handled by filer-bot with its narrowly-scoped `FORGE_FILER_TOKEN`.
 ### Session management
 The agent maintains a global session file at `/tmp/architect-session-{project}.sid`.
@ -96,7 +70,6 @@ Run via `architect/architect-run.sh`, which:
 - Acquires a poll-loop lock (via `acquire_lock`) and checks available memory
 - Cleans up per-issue scratch files from previous runs (`/tmp/architect-{project}-scratch-*.md`)
 - Sources shared libraries (env.sh, formula-session.sh)
 - Exports `FORGE_TOKEN_OVERRIDE="${FORGE_ARCHITECT_TOKEN}"` BEFORE sourcing env.sh, ensuring architect-bot identity survives re-sourcing (#762)
 - Uses FORGE_ARCHITECT_TOKEN for authentication
 - Processes existing architect PRs via bash-driven design phase
 - Loads the formula and builds context from VISION.md, AGENTS.md, and ops repo
@ -106,9 +79,7 @@ Run via `architect/architect-run.sh`, which:
  - Selects up to `pitch_budget` (3 - open architect PRs) remaining vision issues
  - For each selected issue, invokes stateless `claude -p` with issue body + context
  - Creates PRs directly from pitch content (no scratch files)
- Agent is invoked for stateless pitch generation and response processing (ACCEPT/REJECT handling)
+- Agent is invoked only for response processing (ACCEPT/REJECT handling)
 - NOTE: architect-bot is read-only on the project repo (#764) — sub-issue filing
  and in-progress label management are handled by filer-bot after sprint PR merge
 **Multi-sprint pitching**: The architect pitches up to 3 sprints per run. Bash handles all state management:
 - Fetches Forgejo API data (vision issues, open PRs, merged PRs)
@ -133,5 +104,4 @@ empty file not created, just document it).
 - #100: Architect formula — research + design fork identification
 - #101: Architect formula — sprint PR creation with questions
 - #102: Architect formula — answer parsing + sub-issue filing
 - #764: Permission scoping — architect read-only on project repo, filer-bot files sub-issues
 - #491: Refactor — bash-driven design phase with stateful session resumption
--- a/architect/architect-run.sh
+++ b/architect/architect-run.sh
@ -34,11 +34,10 @@ FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
 # Accept project config from argument; default to disinto
 export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}"
 # Set override BEFORE sourcing env.sh so it survives any later re-source of
 # env.sh from nested shells / claude -p tools (#762, #747)
 export FORGE_TOKEN_OVERRIDE="${FORGE_ARCHITECT_TOKEN:-}"
 # shellcheck source=../lib/env.sh
 source "$FACTORY_ROOT/lib/env.sh"
 # Override FORGE_TOKEN with architect-bot's token (#747)
 FORGE_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}"
 # shellcheck source=../lib/formula-session.sh
 source "$FACTORY_ROOT/lib/formula-session.sh"
 # shellcheck source=../lib/worktree.sh
@ -79,9 +78,6 @@ memory_guard 2000
 log "--- Architect run start ---"
 # ── Resolve forge remote for git operations ─────────────────────────────
 # Run git operations from the project checkout, not the baked code dir
 cd "$PROJECT_REPO_ROOT"
 resolve_forge_remote
 # ── Resolve agent identity for .profile repo ────────────────────────────
@ -117,8 +113,8 @@ build_architect_prompt() {
 You are the architect agent for ${FORGE_REPO}. Work through the formula below.
 Your role: strategic decomposition of vision issues into development sprints.
-Propose sprints via PRs on the ops repo, converse with humans through PR comments.
+Propose sprints via PRs on the ops repo, converse with humans through PR comments,
-You are READ-ONLY on the project repo — sub-issues are filed by filer-bot after sprint PR merge (#764).
+and file sub-issues after design forks are resolved.
 ## Project context
 ${CONTEXT_BLOCK}
@ -133,88 +129,7 @@ ${PROMPT_FOOTER}
 _PROMPT_EOF_
 }
-# ── Build prompt for specific session mode ───────────────────────────────
+PROMPT=$(build_architect_prompt)
 # Args: session_mode (pitch / questions_phase / start_questions)
 # Returns: prompt text via stdout
 build_architect_prompt_for_mode() {
  local session_mode="$1"
  case "$session_mode" in
    "start_questions")
      cat <<_PROMPT_EOF_
 You are the architect agent for ${FORGE_REPO}. Work through the formula below.
 Your role: strategic decomposition of vision issues into development sprints.
 Propose sprints via PRs on the ops repo, converse with humans through PR comments.
 You are READ-ONLY on the project repo — sub-issues are filed by filer-bot after sprint PR merge (#764).
 ## CURRENT STATE: Approved PR awaiting initial design questions
 A sprint pitch PR has been approved by the human (via APPROVED review), but the
 design conversation has not yet started. Your task is to:
 1. Read the approved sprint pitch from the PR body
 2. Identify the key design decisions that need human input
 3. Post initial design questions (Q1:, Q2:, etc.) as comments on the PR
 4. Add a `## Design forks` section to the PR body documenting the design decisions
 5. Update the ## Sub-issues section in the sprint spec if design decisions affect decomposition
 This is NOT a pitch phase — the pitch is already approved. This is the START
 of the design Q&A phase. Sub-issues are filed by filer-bot after sprint PR merge (#764).
 ## Project context
 ${CONTEXT_BLOCK}
 ${GRAPH_SECTION}
 ${SCRATCH_CONTEXT}
 $(formula_lessons_block)
 ## Formula
 ${FORMULA_CONTENT}
 ${SCRATCH_INSTRUCTION}
 ${PROMPT_FOOTER}
 _PROMPT_EOF_
      ;;
    "questions_phase")
      cat <<_PROMPT_EOF_
 You are the architect agent for ${FORGE_REPO}. Work through the formula below.
 Your role: strategic decomposition of vision issues into development sprints.
 Propose sprints via PRs on the ops repo, converse with humans through PR comments.
 You are READ-ONLY on the project repo — sub-issues are filed by filer-bot after sprint PR merge (#764).
 ## CURRENT STATE: Design Q&A in progress
 A sprint pitch PR is in the questions phase:
 - The PR has a `## Design forks` section
 - Initial questions (Q1:, Q2:, etc.) have been posted
 - Humans may have posted answers or follow-up questions
 Your task is to:
 1. Read the existing questions and the PR body
 2. Read human answers from PR comments
 3. Parse the answers and determine next steps
 4. Post follow-up questions if needed (Q3:, Q4:, etc.)
 5. If all design forks are resolved, finalize the ## Sub-issues section in the sprint spec
 6. Update the `## Design forks` section as you progress
 ## Project context
 ${CONTEXT_BLOCK}
 ${GRAPH_SECTION}
 ${SCRATCH_CONTEXT}
 $(formula_lessons_block)
 ## Formula
 ${FORMULA_CONTENT}
 ${SCRATCH_INSTRUCTION}
 ${PROMPT_FOOTER}
 _PROMPT_EOF_
      ;;
    "pitch"|*)
      # Default: pitch new sprints (original behavior)
      build_architect_prompt
      ;;
  esac
 }
 # ── Create worktree ──────────────────────────────────────────────────────
 formula_worktree_setup "$WORKTREE"
@ -236,7 +151,7 @@ detect_questions_phase() {
  # Use Forgejo API to find open architect PRs
  local response
  response=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls?state=open" 2>/dev/null) || return 1
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open" 2>/dev/null) || return 1
  # Check each open PR for architect markers
  pr_number=$(printf '%s' "$response" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null | head -1) || return 1
@ -247,7 +162,7 @@ detect_questions_phase() {
  # Fetch PR body
  pr_body=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}" 2>/dev/null | jq -r '.body // empty') || return 1
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}" 2>/dev/null | jq -r '.body // empty') || return 1
  # Check for `## Design forks` section (added by #101 after ACCEPT)
  if ! printf '%s' "$pr_body" | grep -q "## Design forks"; then
@ -258,7 +173,7 @@ detect_questions_phase() {
  # Use jq to extract body text before grepping (handles JSON escaping properly)
  local comments
  comments=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" 2>/dev/null) || return 1
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" 2>/dev/null) || return 1
  if ! printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE 'Q[0-9]+:'; then
    return 1
@ -269,71 +184,6 @@ detect_questions_phase() {
  return 0
 }
 # ── Detect if PR is approved and awaiting initial design questions ────────
 # A PR is in this state when:
 # - It's an open architect PR on ops repo
 # - It has an APPROVED review (from human acceptance)
 # - It has NO `## Design forks` section yet
 # - It has NO Q1:, Q2:, etc. comments yet
 # This means the human accepted the pitch and we need to start the design
 # conversation by posting initial questions and adding the Design forks section.
 detect_approved_pending_questions() {
  local pr_number=""
  local pr_body=""
  # Get open architect PRs on ops repo
  local ops_repo="${OPS_REPO_ROOT:-/home/agent/data/ops}"
  if [ ! -d "${ops_repo}/.git" ]; then
    return 1
  fi
  # Use Forgejo API to find open architect PRs
  local response
  response=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls?state=open" 2>/dev/null) || return 1
  # Check each open PR for architect markers
  pr_number=$(printf '%s' "$response" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null | head -1) || return 1
  if [ -z "$pr_number" ]; then
    return 1
  fi
  # Fetch PR body
  pr_body=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}" 2>/dev/null | jq -r '.body // empty') || return 1
  # Check for APPROVED review
  local reviews
  reviews=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/reviews" 2>/dev/null) || return 1
  if ! printf '%s' "$reviews" | jq -e '.[] | select(.state == "APPROVED")' >/dev/null 2>&1; then
    return 1
  fi
  # Check that PR does NOT have `## Design forks` section yet
  # (we're in the "start questions" phase, not "process answers" phase)
  if printf '%s' "$pr_body" | grep -q "## Design forks"; then
    # Has design forks section — this is either in questions phase or past it
    return 1
  fi
  # Check that PR has NO question comments yet (Q1:, Q2:, etc.)
  local comments
  comments=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" 2>/dev/null) || return 1
  if printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE 'Q[0-9]+:'; then
    # Has question comments — this is either in questions phase or past it
    return 1
  fi
  # PR is approved and awaiting initial design questions
  log "Detected PR #${pr_number} approved and awaiting initial design questions"
  return 0
 }
 # ── Sub-issue existence check ────────────────────────────────────────────
 # Check if a vision issue already has sub-issues filed from it.
 # Returns 0 if sub-issues exist and are open, 1 otherwise.
@ -372,7 +222,7 @@ has_merged_sprint_pr() {
  # Get closed PRs from ops repo
  local prs_json
  prs_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls?state=closed&limit=100" 2>/dev/null) || return 1
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=closed&limit=100" 2>/dev/null) || return 1
  # Check each closed PR for architect markers and vision issue reference
  local pr_numbers
@ -385,7 +235,7 @@ has_merged_sprint_pr() {
    # Get PR details including merged status
    local pr_details
    pr_details=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
-      "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null) || continue
+      "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null) || continue
    # Check if PR is actually merged (not just closed)
    local is_merged
@ -418,16 +268,11 @@ fetch_vision_issues() {
    "${FORGE_API}/issues?labels=vision&state=open&limit=100" 2>/dev/null || echo '[]'
 }
 # NOTE: get_vision_subissues, all_subissues_closed, close_vision_issue,
 # check_and_close_completed_visions removed (#764) — architect-bot is read-only
 # on the project repo. Vision lifecycle (closing completed visions, adding
 # in-progress labels) is now handled by filer-bot via lib/sprint-filer.sh.
 # ── Helper: Fetch open architect PRs from ops repo Forgejo API ───────────
 # Returns: JSON array of architect PR objects
 fetch_open_architect_prs() {
  curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null || echo '[]'
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null || echo '[]'
 }
 # ── Helper: Get vision issue body by number ──────────────────────────────
@ -513,23 +358,7 @@ Instructions:
 ## Recommendation
 <architect's assessment: worth it / defer / alternative approach>
 ## Sub-issues
 <!-- filer:begin -->
 - id: <kebab-case-id>
  title: \"vision(#${issue_num}): <concise sub-issue title>\"
  labels: [backlog]
  depends_on: []
  body: |
    ## Goal
    <what this sub-issue accomplishes>
    ## Acceptance criteria
    - [ ] <criterion>
 <!-- filer:end -->
 IMPORTANT: Do NOT include design forks or questions. This is a go/no-go pitch.
 The ## Sub-issues block is parsed by the filer-bot pipeline after sprint PR merge.
 Each sub-issue between filer:begin/end markers becomes a Forgejo issue.
 ---
@ -537,11 +366,12 @@ ${pitch_context}
 "
  # Execute stateless claude -p call
-  agent_run "$pitch_prompt" 2>>"$LOGFILE" || true
+  local pitch_output
  pitch_output=$(agent_run -p "$pitch_prompt" --output-format json --dangerously-skip-permissions --max-turns 200 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") || true
  # Extract pitch content from JSON response
  local pitch
-  pitch=$(printf '%s' "$_AGENT_LAST_OUTPUT" | jq -r '.result // empty' 2>/dev/null) || pitch=""
+  pitch=$(printf '%s' "$pitch_output" | jq -r '.content // empty' 2>/dev/null) || pitch=""
  if [ -z "$pitch" ]; then
    log "WARNING: empty pitch generated for vision issue #${issue_num}"
@ -564,7 +394,7 @@ create_sprint_pr() {
  if ! curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/branches" \
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/branches" \
    -d "{\"new_branch_name\": \"${branch_name}\", \"old_branch_name\": \"${PRIMARY_BRANCH:-main}\"}" >/dev/null 2>&1; then
    log "WARNING: failed to create branch ${branch_name}"
    return 1
@ -589,7 +419,7 @@ ${sprint_body}
  if ! curl -sf -X PUT \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/contents/sprints/${sprint_slug}.md" \
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/contents/sprints/${sprint_slug}.md" \
    -d "{\"message\": \"sprint: add ${sprint_slug}.md\", \"content\": \"${sprint_spec_b64}\", \"branch\": \"${branch_name}\"}" >/dev/null 2>&1; then
    log "WARNING: failed to write sprint spec file"
    return 1
@ -608,7 +438,7 @@ ${sprint_body}
  pr_response=$(curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls" \
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls" \
    -d "$pr_payload" 2>/dev/null) || return 1
  # Extract PR number
@ -628,7 +458,7 @@ post_pr_footer() {
  if curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" \
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" \
    -d "{\"body\": \"${footer}\"}" >/dev/null 2>&1; then
    log "Posted footer comment on PR #${pr_number}"
    return 0
@ -638,8 +468,37 @@ post_pr_footer() {
  fi
 }
-# NOTE: add_inprogress_label removed (#764) — architect-bot is read-only on
+# ── Helper: Add in-progress label to vision issue ────────────────────────
-# project repo. in-progress label is now added by filer-bot via sprint-filer.sh.
+# Args: vision_issue_number
 add_inprogress_label() {
  local issue_num="$1"
  # Get label ID for 'in-progress'
  local labels_json
  labels_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/labels" 2>/dev/null) || return 1
  local inprogress_label_id
  inprogress_label_id=$(printf '%s' "$labels_json" | jq -r --arg label "in-progress" '.[] | select(.name == $label) | .id' 2>/dev/null) || true
  if [ -z "$inprogress_label_id" ]; then
    log "WARNING: in-progress label not found"
    return 1
  fi
  # Add label to issue
  if curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/repos/${FORGE_REPO}/issues/${issue_num}/labels" \
    -d "{\"labels\": [${inprogress_label_id}]}" >/dev/null 2>&1; then
    log "Added in-progress label to vision issue #${issue_num}"
    return 0
  else
    log "WARNING: failed to add in-progress label to vision issue #${issue_num}"
    return 1
  fi
 }
 # ── Precondition checks in bash before invoking the model ─────────────────
@ -649,7 +508,7 @@ vision_count=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
 if [ "${vision_count:-0}" -eq 0 ]; then
  # Check for open architect PRs that need handling (ACCEPT/REJECT responses)
  open_arch_prs=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=10" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=10" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0
  if [ "${open_arch_prs:-0}" -eq 0 ]; then
    log "no vision issues and no open architect PRs — skipping"
    exit 0
@ -660,18 +519,10 @@ fi
 # This ensures responses are processed regardless of open_arch_prs count
 has_responses_to_process=false
 pr_numbers=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
-  "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers=""
+  "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers=""
 for pr_num in $pr_numbers; do
  # Check formal reviews first (Forgejo green check via review API)
  reviews=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}/reviews" 2>/dev/null) || reviews="[]"
  if printf '%s' "$reviews" | jq -e '.[] | select(.state == "APPROVED" or .state == "REQUEST_CHANGES")' >/dev/null 2>&1; then
    has_responses_to_process=true
    break
  fi
  # Then check ACCEPT/REJECT in comments (legacy / human-typed)
  comments=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue
  if printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE '(ACCEPT|REJECT):'; then
    has_responses_to_process=true
    break
@ -680,7 +531,7 @@ done
 # Check 2 (continued): Skip if already at max open pitches (3), unless there are responses to process
 open_arch_prs=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
-  "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0
+  "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0
 if [ "${open_arch_prs:-0}" -ge 3 ]; then
  if [ "$has_responses_to_process" = false ]; then
    log "already 3 open architect PRs with no responses to process — skipping"
@ -689,8 +540,6 @@ if [ "${open_arch_prs:-0}" -ge 3 ]; then
  log "3 open architect PRs found but responses detected — processing"
 fi
 # NOTE: Vision lifecycle check (close completed visions) moved to filer-bot (#764)
 # ── Bash-driven state management: Select vision issues for pitching ───────
 # This logic is also documented in formulas/run-architect.toml preflight step
@ -703,7 +552,7 @@ declare -A _arch_vision_issues_with_open_prs
 while IFS= read -r pr_num; do
  [ -z "$pr_num" ] && continue
  pr_body=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null | jq -r '.body // ""') || continue
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null | jq -r '.body // ""') || continue
  # Extract vision issue numbers referenced in PR body (e.g., "refs #419" or "#419")
  while IFS= read -r ref_issue; do
    [ -z "$ref_issue" ] && continue
@ -825,7 +674,8 @@ for vision_issue in "${ARCHITECT_TARGET_ISSUES[@]}"; do
  # Post footer comment
  post_pr_footer "$pr_number"
-  # NOTE: in-progress label is added by filer-bot after sprint PR merge (#764)
+  # Add in-progress label to vision issue
  add_inprogress_label "$vision_issue"
  pitch_count=$((pitch_count + 1))
  log "Completed pitch for vision issue #${vision_issue} — PR #${pr_number}"
@ -841,16 +691,16 @@ if [ "${has_responses_to_process:-false}" = "true" ]; then
  # Check if any PRs have responses that need agent handling
  needs_agent=false
  pr_numbers=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
-    "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers=""
+    "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers=""
  for pr_num in $pr_numbers; do
    # Check for ACCEPT/REJECT in comments
    comments=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
-      "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue
+      "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue
    # Check for review decisions (higher precedence)
    reviews=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
-      "${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}/reviews" 2>/dev/null) || reviews=""
+      "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}/reviews" 2>/dev/null) || reviews=""
    # Check for ACCEPT (APPROVED review or ACCEPT comment)
    if printf '%s' "$reviews" | jq -e '.[] | select(.state == "APPROVED")' >/dev/null 2>&1; then
@ -867,32 +717,19 @@ if [ "${has_responses_to_process:-false}" = "true" ]; then
  # Run agent only if there are responses to process
  if [ "$needs_agent" = "true" ]; then
-    # Determine session handling based on PR state
+    # Determine whether to resume session
    RESUME_ARGS=()
-    SESSION_MODE="fresh"
+    if detect_questions_phase && [ -f "$SID_FILE" ]; then
-
+      RESUME_SESSION=$(cat "$SID_FILE")
-    if detect_questions_phase; then
+      RESUME_ARGS=(--resume "$RESUME_SESSION")
-      # PR is in questions-awaiting-answers phase — resume from that session
+      log "Resuming session from questions phase run: ${RESUME_SESSION:0:12}..."
-      if [ -f "$SID_FILE" ]; then
+    elif ! detect_questions_phase; then
        RESUME_SESSION=$(cat "$SID_FILE")
        RESUME_ARGS=(--resume "$RESUME_SESSION")
        SESSION_MODE="questions_phase"
        log "PR in questions-awaiting-answers phase — resuming session: ${RESUME_SESSION:0:12}..."
      else
        log "PR in questions phase but no session file — starting fresh session"
      fi
    elif detect_approved_pending_questions; then
      # PR is approved but awaiting initial design questions — start fresh with special prompt
      SESSION_MODE="start_questions"
      log "PR approved and awaiting initial design questions — starting fresh session"
    else
      log "PR not in questions phase — starting fresh session"
    elif [ ! -f "$SID_FILE" ]; then
      log "No session ID found for questions phase — starting fresh session"
    fi
-    # Build prompt with appropriate mode
+    agent_run "${RESUME_ARGS[@]}" --worktree "$WORKTREE" "$PROMPT"
    PROMPT_FOR_MODE=$(build_architect_prompt_for_mode "$SESSION_MODE")
    agent_run "${RESUME_ARGS[@]}" --worktree "$WORKTREE" "$PROMPT_FOR_MODE"
    log "agent_run complete"
  fi
 fi
--- a/bin/disinto
+++ b/bin/disinto
--- a/dev/AGENTS.md
+++ b/dev/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: c363ee0aea2ae447daab28c2c850d6abefc8c6b5 -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Dev Agent
 **Role**: Implement issues autonomously — write code, push branches, address
@ -29,16 +29,12 @@ stale checks (vision issues are managed by the architect). If the issue is assig
 `REQUEST_CHANGES`, spawns the dev-agent to address it before setting `BLOCKED_BY_INPROGRESS=true`;
 otherwise just sets blocked. If assigned to another agent, logs and falls through (does not
 block). If no assignee, no open PR, and no agent lock file — removes `in-progress`, adds
-`blocked` with a human-triage comment. **Post-crash self-assigned recovery (#749)**: when the
+`blocked` with a human-triage comment. **Per-agent open-PR gate**: before starting new work,
 issue is self-assigned (this bot) but there is no open PR, dev-poll now checks for a lock
 file (`/tmp/dev-impl-summary-$PROJECT_NAME-$ISSUE_NUM.txt`) AND a remote branch
 (`fix/issue-$ISSUE_NUM`) before declaring "my thread is busy". If neither exists after a cold
 boot, it spawns a fresh dev-agent for recovery instead of looping forever. **Per-agent open-PR gate**: before starting new work,
 filters open waiting PRs to only those assigned to this agent (`$BOT_USER`). Other agents'
 PRs do not block this agent's pipeline (#358, #369). **Pre-lock merge scan own-PRs only**:
 the direct-merge scan only merges PRs whose linked issue is assigned to this agent — skips
 PRs owned by other bot users (#374).
- `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval. **Launched as a subshell** (`("${SCRIPT_DIR}/dev-agent.sh" ...) &`) — not via `nohup` — to avoid deadlocking the polling loop and review-poll when running in the same container (#693).
+- `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval
 - `dev/phase-test.sh` — Integration test for the phase protocol
 **Environment variables consumed** (via `lib/env.sh` + project TOML):
@ -55,12 +51,6 @@ PRs owned by other bot users (#374).
 **Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h.
 **Polling loop isolation (#753)**: `docker/agents/entrypoint.sh` now tracks fast-poll PIDs
 (`FAST_PIDS`) and calls `wait "${FAST_PIDS[@]}"` instead of `wait` (no-args). This means
 long-running dev-agent sessions no longer block the loop from launching the next iteration's
 fast polls — the loop only waits for review-poll and dev-poll (the fast agents), never for
 the dev-agent subprocess itself.
 **Lifecycle**: dev-poll.sh (invoked by polling loop, `check_active dev`) → dev-agent.sh →
 tmux session → phase file drives CI/review loop → merge + `mirror_push()` → close issue.
 On respawn after `PHASE:escalate`, the stale phase file is cleared first so the session
--- a/dev/dev-agent.sh
+++ b/dev/dev-agent.sh
@ -268,22 +268,8 @@ log "forge remote: ${FORGE_REMOTE}"
 # First attempt: fix/issue-N, subsequent: fix/issue-N-1, fix/issue-N-2, etc.
 if [ "$RECOVERY_MODE" = false ]; then
  # Count only branches matching fix/issue-N, fix/issue-N-1, fix/issue-N-2, etc. (exact prefix match)
-  # Use explicit error handling to avoid silent failure from set -e + pipefail when git ls-remote fails.
+  ATTEMPT=$(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}" 2>/dev/null | grep -c "refs/heads/fix/issue-${ISSUE}$" || echo 0)
-  if _lr1=$(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}" 2>&1); then
+  ATTEMPT=$((ATTEMPT + $(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}-*" 2>/dev/null | wc -l)))
    ATTEMPT=$(printf '%s\n' "$_lr1" | grep -c "refs/heads/fix/issue-${ISSUE}$" || true)
  else
    log "WARNING: git ls-remote failed for attempt counting: $_lr1"
    ATTEMPT=0
  fi
  ATTEMPT="${ATTEMPT:-0}"
  if _lr2=$(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}-*" 2>&1); then
    # Guard on empty to avoid off-by-one: command substitution strips trailing newlines,
    # so wc -l undercounts by 1 when output exists. Re-add newline only if non-empty.
    ATTEMPT=$((ATTEMPT + $( [ -z "$_lr2" ] && echo 0 || printf '%s\n' "$_lr2" | wc -l )))
  else
    log "WARNING: git ls-remote failed for suffix counting: $_lr2"
  fi
  if [ "$ATTEMPT" -gt 0 ]; then
    BRANCH="fix/issue-${ISSUE}-${ATTEMPT}"
  fi
--- a/dev/dev-poll.sh
+++ b/dev/dev-poll.sh
@ -115,10 +115,11 @@ in_progress_recently_added() {
  now=$(date +%s)
  # Query issue timeline for the most recent in-progress label event.
-  # Forgejo 11.x API returns type as string "label", not integer 7.
+  # Forgejo serializes CommentType as an integer, not a string —
  # CommentTypeLabel is 7 in the Gitea/Forgejo enum.
  label_ts=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${API}/issues/${issue}/timeline" | \
-    jq -r '[.[] | select(.type == "label") | select(.label.name == "in-progress")] | last | .created_at // empty') || true
+    jq -r '[.[] | select(.type == 7) | select(.label.name == "in-progress")] | last | .created_at // empty') || true
  if [ -z "$label_ts" ]; then
    return 1  # no label event found — not recently added
@ -426,7 +427,6 @@ ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
 ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length')
 BLOCKED_BY_INPROGRESS=false
 OTHER_AGENT_INPROGRESS=false
 if [ "$ORPHAN_COUNT" -gt 0 ]; then
  ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number')
@ -439,14 +439,12 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
    OPEN_PR=true
  fi
-  # Skip issues owned by non-dev agents (bug-report, vision, prediction, etc.)
+  # Skip vision-labeled issues — they are managed by architect agent, not dev-poll
  # See issue #608: dev-poll must only touch issues it could actually claim.
  issue_labels=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
    "${API}/issues/${ISSUE_NUM}" | jq -r '[.labels[].name] | join(",")')
-  if ! issue_is_dev_claimable "$issue_labels"; then
+  if echo "$issue_labels" | grep -q "vision"; then
-    log "issue #${ISSUE_NUM} has non-dev label(s) [${issue_labels}] — skipping (owned by another agent)"
+    log "issue #${ISSUE_NUM} has 'vision' label — skipping stale detection (managed by architect)"
-    BLOCKED_BY_INPROGRESS=false
+    BLOCKED_BY_INPROGRESS=true
    OTHER_AGENT_INPROGRESS=true
  fi
  # Check if issue has an assignee — only block on issues assigned to this agent
@ -468,7 +466,7 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
        if [ "${HAS_CHANGES:-0}" -gt 0 ]; then
          log "issue #${ISSUE_NUM} has review feedback — spawning agent"
-          ("${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1) &
+          nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
          log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)"
          BLOCKED_BY_INPROGRESS=true
        else
@ -476,29 +474,18 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
          BLOCKED_BY_INPROGRESS=true
        fi
      else
-        # No open PR — check if a thread is actually alive (lock file or remote branch)
+        log "issue #${ISSUE_NUM} assigned to me — my thread is busy"
-        LOCK_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE_NUM}.txt"
+        BLOCKED_BY_INPROGRESS=true
        REMOTE_BRANCH_EXISTS=$(git ls-remote --exit-code origin "fix/issue-${ISSUE_NUM}" >/dev/null 2>&1 && echo yes || echo no)
        if [ -f "$LOCK_FILE" ] || [ "$REMOTE_BRANCH_EXISTS" = "yes" ]; then
          log "issue #${ISSUE_NUM} assigned to me — my thread is busy (lock=$([ -f "$LOCK_FILE" ] && echo y || echo n) remote_branch=$REMOTE_BRANCH_EXISTS)"
          BLOCKED_BY_INPROGRESS=true
        else
          log "issue #${ISSUE_NUM} self-assigned but orphaned (no lock, no branch, no PR) — recovering"
          nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
          log "started dev-agent PID $! for issue #${ISSUE_NUM} (post-crash recovery)"
          BLOCKED_BY_INPROGRESS=true
        fi
      fi
    else
      log "issue #${ISSUE_NUM} assigned to ${assignee} — their thread, not blocking"
-      OTHER_AGENT_INPROGRESS=true
+      BLOCKED_BY_INPROGRESS=true
-      # Issue assigned to another agent — skip stale checks but fall through to backlog
+      # Issue assigned to another agent — don't block, fall through to backlog
    fi
  fi
-  # Only proceed with in-progress checks if not blocked by this agent's own work
+  # Only proceed with in-progress checks if not blocked by another agent
-  if [ "$BLOCKED_BY_INPROGRESS" = false ] && [ "$OTHER_AGENT_INPROGRESS" = false ]; then
+  if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
    # Check for dev-agent lock file (agent may be running in another container)
    LOCK_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE_NUM}.txt"
    if [ -f "$LOCK_FILE" ]; then
@ -518,6 +505,20 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
      fi
    fi
    # Formula guard: formula-labeled issues should not be worked on by dev-agent.
    # Remove in-progress label and skip to prevent infinite respawn cycle (#115).
    if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
      ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true
      SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true
      if [ -n "$SKIP_LABEL" ]; then
        log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping"
        IP_ID=$(_ilc_in_progress_id)
        curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
          "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true
        BLOCKED_BY_INPROGRESS=true
      fi
    fi
    # Check if there's already an open PR for this issue
    if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
      HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
@ -571,7 +572,7 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
            else
              # Direct merge failed (conflicts?) — fall back to dev-agent
              log "falling back to dev-agent for PR #${HAS_PR} merge"
-              ("${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1) &
+              nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
              log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)"
              BLOCKED_BY_INPROGRESS=true
            fi
@ -589,7 +590,7 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
              BLOCKED_BY_INPROGRESS=false
            else
              log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent"
-              ("${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1) &
+              nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
              log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)"
              BLOCKED_BY_INPROGRESS=true
            fi
@ -604,7 +605,7 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
                BLOCKED_BY_INPROGRESS=true  # exhausted between check and launch
              else
                log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)"
-                ("${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1) &
+                nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
                log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)"
                BLOCKED_BY_INPROGRESS=true
              fi
@ -630,7 +631,7 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
          # Don't block — fall through to backlog
        else
          log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})"
-          ("${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1) &
+          nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
          log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)"
          BLOCKED_BY_INPROGRESS=true
        fi
@ -697,7 +698,7 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do
    fi
    # Direct merge failed (conflicts?) — fall back to dev-agent
    log "falling back to dev-agent for PR #${PR_NUM} merge"
-    ("${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1) &
+    nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 &
    log "started dev-agent PID $! for stuck PR #${PR_NUM} (agent-merge)"
    exit 0
  fi
@ -718,7 +719,7 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do
      continue  # skip this PR, check next stuck PR or fall through to backlog
    fi
    log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) has REQUEST_CHANGES — fixing first"
-    ("${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1) &
+    nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 &
    log "started dev-agent PID $! for stuck PR #${PR_NUM}"
    exit 0
  elif ci_failed "$CI_STATE"; then
@ -730,7 +731,7 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do
      continue  # exhausted between check and launch
    fi
    log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI failed — fixing (attempt ${CI_FIX_ATTEMPTS}/3)"
-    ("${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1) &
+    nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 &
    log "started dev-agent PID $! for stuck PR #${PR_NUM}"
    exit 0
  fi
@ -851,7 +852,7 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do
      fi
      # Direct merge failed (conflicts?) — fall back to dev-agent
      log "falling back to dev-agent for PR #${EXISTING_PR} merge"
-      ("${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1) &
+      nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
      log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)"
      exit 0
@ -929,5 +930,5 @@ if [ -n "${READY_PR_FOR_INCREMENT:-}" ]; then
 fi
 log "launching dev-agent for #${READY_ISSUE}"
-("${SCRIPT_DIR}/dev-agent.sh" "$READY_ISSUE" >> "$LOGFILE" 2>&1) &
+nohup "${SCRIPT_DIR}/dev-agent.sh" "$READY_ISSUE" >> "$LOGFILE" 2>&1 &
 log "started dev-agent PID $! for issue #${READY_ISSUE}"
--- a/disinto-factory/SKILL.md
+++ b/disinto-factory/SKILL.md
@ -11,6 +11,7 @@ You are helping the user set up and operate a **disinto autonomous code factory*
 - **[Setup guide](setup.md)** — First-time factory setup: environment, init, verification, backlog seeding
 - **[Operations guide](operations.md)** — Day-to-day: status checks, CI debugging, unsticking issues, Forgejo access
 - **[Lessons learned](lessons-learned.md)** — Patterns for writing issues, debugging CI, retrying failures, vault operations, breaking down features
 ## Important context
--- a/disinto-factory/lessons-learned.md
+++ b/disinto-factory/lessons-learned.md
@ -0,0 +1,35 @@
 # Lessons learned
 ## Remediation & deployment
 **Escalate gradually.** Cheapest fix first, re-measure, escalate only if it persists. Single-shot fixes are either too weak or cause collateral damage.
 **Parameterize deployment boundaries.** Entrypoint references to a specific project name are config values waiting to escape. `${VAR:-default}` preserves compat and unlocks reuse.
 **Fail loudly over silent defaults.** A fatal error with a clear message beats a wrong default that appears to work.
 **Audit the whole file when fixing one value.** Hardcoded assumptions cluster. Fixing one while leaving siblings produces multi-commit churn.
 ## Documentation
 **Per-context rewrites, not batch replacement.** Each doc mention sits in a different narrative. Blanket substitution produces awkward text.
 **Search for implicit references too.** After keyword matches, check for instructions that assume the old mechanism without naming it.
 ## Code review
 **Approval means "safe to ship," not "how I'd write it."** Distinguish "wrong" from "different" — only the former blocks.
 **Scale scrutiny to blast radius.** A targeted fix warrants less ceremony than a cross-cutting refactor.
 **Be specific; separate blockers from preferences.** Concrete observations invite fixes; vague concerns invite debate.
 **Read diffs top-down: intent, behavior, edge cases.** Verify the change matches its stated goal before examining lines.
 ## Issue authoring & retry
 **Self-contained issue bodies.** The agent reads the body, not comments. On retry, update the body with exact error and fix guidance.
 **Clean stale branches before retry.** Old branches trigger recovery on stale code. Close PR, delete branch, relabel.
 **Diagnose CI failures externally.** The agent sees pass/fail, not logs. After repeated failures, read logs yourself and put findings in the issue.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -7,62 +7,29 @@ services:
      dockerfile: docker/agents/Dockerfile
    image: disinto/agents:latest
    container_name: disinto-agents
    restart: unless-stopped
    security_opt:
      - apparmor=unconfined
    volumes:
-      - agent-data:/home/agent/data
+      - ./data/agents:/home/agent/data
-      - project-repos:/home/agent/repos
+      - ./disinto:/home/agent/disinto:ro
-      - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
+      - /usr/local/bin/claude:/usr/local/bin/claude:ro
      - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro
      - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
      - ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro
      - ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro
      - woodpecker-data:/woodpecker-data:ro
    environment:
      - FORGE_URL=http://forgejo:3000
      - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
      - FORGE_TOKEN=${FORGE_TOKEN:-}
      - FORGE_REVIEW_TOKEN=${FORGE_REVIEW_TOKEN:-}
      - FORGE_PLANNER_TOKEN=${FORGE_PLANNER_TOKEN:-}
      - FORGE_GARDENER_TOKEN=${FORGE_GARDENER_TOKEN:-}
      - FORGE_VAULT_TOKEN=${FORGE_VAULT_TOKEN:-}
      - FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-}
      - FORGE_PREDICTOR_TOKEN=${FORGE_PREDICTOR_TOKEN:-}
      - FORGE_ARCHITECT_TOKEN=${FORGE_ARCHITECT_TOKEN:-}
-      - FORGE_FILER_TOKEN=${FORGE_FILER_TOKEN:-}
+      - FORGE_VAULT_TOKEN=${FORGE_VAULT_TOKEN:-}
      - FORGE_PLANNER_TOKEN=${FORGE_PLANNER_TOKEN:-}
      - FORGE_BOT_USERNAMES=${FORGE_BOT_USERNAMES:-}
      - WOODPECKER_TOKEN=${WOODPECKER_TOKEN:-}
      - CLAUDE_TIMEOUT=${CLAUDE_TIMEOUT:-7200}
      - CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      - FORGE_PASS=${FORGE_PASS:-}
      - FORGE_ADMIN_PASS=${FORGE_ADMIN_PASS:-}
-      - FACTORY_REPO=${FORGE_REPO:-disinto-admin/disinto}
+      - DISINTO_AGENTS=review,gardener
      - DISINTO_CONTAINER=1
      - PROJECT_NAME=${PROJECT_NAME:-project}
      - PROJECT_REPO_ROOT=/home/agent/repos/${PROJECT_NAME:-project}
      - WOODPECKER_DATA_DIR=/woodpecker-data
      - WOODPECKER_REPO_ID=${WOODPECKER_REPO_ID:-}
      - CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
      - POLL_INTERVAL=${POLL_INTERVAL:-300}
      - GARDENER_INTERVAL=${GARDENER_INTERVAL:-21600}
      - ARCHITECT_INTERVAL=${ARCHITECT_INTERVAL:-21600}
      - PLANNER_INTERVAL=${PLANNER_INTERVAL:-43200}
      - SUPERVISOR_INTERVAL=${SUPERVISOR_INTERVAL:-1200}
    healthcheck:
      test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
      interval: 60s
      timeout: 5s
      retries: 3
      start_period: 30s
    depends_on:
-      forgejo:
+      - forgejo
        condition: service_healthy
      woodpecker:
        condition: service_started
    networks:
      - disinto-net
  agents-llama:
    build:
@ -70,23 +37,13 @@ services:
      dockerfile: docker/agents/Dockerfile
    image: disinto/agents-llama:latest
    container_name: disinto-agents-llama
    restart: unless-stopped
    security_opt:
      - apparmor=unconfined
    volumes:
-      - agent-data:/home/agent/data
+      - ./data/llama:/home/agent/data
-      - project-repos:/home/agent/repos
+      - ./disinto:/home/agent/disinto:ro
-      - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
+      - /usr/local/bin/claude:/usr/local/bin/claude:ro
      - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro
      - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
      - ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro
      - ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro
      - woodpecker-data:/woodpecker-data:ro
    environment:
      - FORGE_URL=http://forgejo:3000
-      - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
+      - FORGE_TOKEN=${FORGE_TOKEN_DEVQWEN:-}
      - FORGE_TOKEN=${FORGE_TOKEN_LLAMA:-}
      - FORGE_PASS=${FORGE_PASS_LLAMA:-}
      - FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-}
      - FORGE_PREDICTOR_TOKEN=${FORGE_PREDICTOR_TOKEN:-}
      - FORGE_ARCHITECT_TOKEN=${FORGE_ARCHITECT_TOKEN:-}
@ -101,96 +58,34 @@ services:
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      - ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-}
      - FORGE_ADMIN_PASS=${FORGE_ADMIN_PASS:-}
-      - DISINTO_CONTAINER=1
+      - DISINTO_AGENTS=dev
      - PROJECT_TOML=projects/disinto.toml
-      - PROJECT_NAME=${PROJECT_NAME:-project}
+      - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
      - PROJECT_REPO_ROOT=/home/agent/repos/${PROJECT_NAME:-project}
      - WOODPECKER_DATA_DIR=/woodpecker-data
      - WOODPECKER_REPO_ID=${WOODPECKER_REPO_ID:-}
      - CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
      - POLL_INTERVAL=${POLL_INTERVAL:-300}
      - AGENT_ROLES=dev
    healthcheck:
      test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
      interval: 60s
      timeout: 5s
      retries: 3
      start_period: 30s
    depends_on:
-      forgejo:
+      - forgejo
        condition: service_healthy
      woodpecker:
        condition: service_started
    networks:
      - disinto-net
-  agents-llama-all:
+  runner:
-    build:
+    image: disinto/agents:latest
-      context: .
+    profiles: ["runner"]
      dockerfile: docker/agents/Dockerfile
    image: disinto/agents-llama:latest
    container_name: disinto-agents-llama-all
    restart: unless-stopped
    profiles: ["agents-llama-all"]
    security_opt:
      - apparmor=unconfined
    volumes:
-      - agent-data:/home/agent/data
+      - /var/run/docker.sock:/var/run/docker.sock
-      - project-repos:/home/agent/repos
+      - /usr/local/bin/claude:/usr/local/bin/claude:ro
-      - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
+      - ${HOME}/.claude:/home/agent/.claude
-      - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro
+      - ${HOME}/.claude.json:/home/agent/.claude.json:ro
-      - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
+    entrypoint: ["bash", "/home/agent/disinto/docker/runner/entrypoint-runner.sh"]
      - ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro
      - ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro
      - woodpecker-data:/woodpecker-data:ro
    environment:
      - FORGE_URL=http://forgejo:3000
      - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
      - FORGE_TOKEN=${FORGE_TOKEN_LLAMA:-}
      - FORGE_PASS=${FORGE_PASS_LLAMA:-}
      - FORGE_REVIEW_TOKEN=${FORGE_REVIEW_TOKEN:-}
      - FORGE_PLANNER_TOKEN=${FORGE_PLANNER_TOKEN:-}
      - FORGE_GARDENER_TOKEN=${FORGE_GARDENER_TOKEN:-}
      - FORGE_VAULT_TOKEN=${FORGE_VAULT_TOKEN:-}
      - FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-}
      - FORGE_PREDICTOR_TOKEN=${FORGE_PREDICTOR_TOKEN:-}
      - FORGE_ARCHITECT_TOKEN=${FORGE_ARCHITECT_TOKEN:-}
      - FORGE_FILER_TOKEN=${FORGE_FILER_TOKEN:-}
      - FORGE_BOT_USERNAMES=${FORGE_BOT_USERNAMES:-}
      - WOODPECKER_TOKEN=${WOODPECKER_TOKEN:-}
      - CLAUDE_TIMEOUT=${CLAUDE_TIMEOUT:-7200}
      - CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
      - CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=60
      - CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      - ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-}
      - FORGE_ADMIN_PASS=${FORGE_ADMIN_PASS:-}
      - DISINTO_CONTAINER=1
-      - PROJECT_TOML=projects/disinto.toml
+      - FORGE_URL=${FORGE_URL:-}
-      - PROJECT_NAME=${PROJECT_NAME:-project}
+      - FORGE_TOKEN=${FORGE_TOKEN:-}
-      - PROJECT_REPO_ROOT=/home/agent/repos/${PROJECT_NAME:-project}
+      - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
-      - WOODPECKER_DATA_DIR=/woodpecker-data
+      - FORGE_OPS_REPO=${FORGE_OPS_REPO:-}
-      - WOODPECKER_REPO_ID=${WOODPECKER_REPO_ID:-}
+      - PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}
-      - CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
-      - POLL_INTERVAL=${POLL_INTERVAL:-300}
+      - CLAUDE_MODEL=${CLAUDE_MODEL:-}
      - GARDENER_INTERVAL=${GARDENER_INTERVAL:-21600}
      - ARCHITECT_INTERVAL=${ARCHITECT_INTERVAL:-21600}
      - PLANNER_INTERVAL=${PLANNER_INTERVAL:-43200}
      - SUPERVISOR_INTERVAL=${SUPERVISOR_INTERVAL:-1200}
      - AGENT_ROLES=review,dev,gardener,architect,planner,predictor,supervisor
    healthcheck:
      test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
      interval: 60s
      timeout: 5s
      retries: 3
      start_period: 30s
    depends_on:
      forgejo:
        condition: service_healthy
      woodpecker:
        condition: service_started
    networks:
-      - disinto-net
+      - default
  reproduce:
    build:
@ -203,9 +98,9 @@ services:
      - /var/run/docker.sock:/var/run/docker.sock
      - agent-data:/home/agent/data
      - project-repos:/home/agent/repos
-      - ${CLAUDE_DIR:-${HOME}/.claude}:/home/agent/.claude
+      - ${HOME}/.claude:/home/agent/.claude
-      - ${CLAUDE_BIN_DIR:-/usr/local/bin/claude}:/usr/local/bin/claude:ro
+      - /usr/local/bin/claude:/usr/local/bin/claude:ro
-      - ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro
+      - ${HOME}/.ssh:/home/agent/.ssh:ro
    env_file:
      - .env
@ -215,72 +110,42 @@ services:
      dockerfile: Dockerfile
    image: disinto/edge:latest
    container_name: disinto-edge
    security_opt:
      - apparmor=unconfined
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
-      - ${CLAUDE_BIN_DIR:-/usr/local/bin/claude}:/usr/local/bin/claude:ro
+      - /usr/local/bin/claude:/usr/local/bin/claude:ro
-      - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/root/.claude.json:ro
+      - ${HOME}/.claude:/home/agent/.claude
-      - ${CLAUDE_DIR:-${HOME}/.claude}:/root/.claude:ro
+      - ${HOME}/.claude.json:/home/agent/.claude.json:ro
      - disinto-logs:/opt/disinto-logs
      - ./docker-compose.yml:/opt/docker-compose.yml:ro
      - ./projects:/opt/disinto-projects:ro
    environment:
      - FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-}
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      - CLAUDE_MODEL=claude-sonnet-4-6
      - FORGE_TOKEN=${FORGE_TOKEN:-}
      - FORGE_URL=http://forgejo:3000
      - FORGE_REPO=disinto-admin/disinto
      - FORGE_OPS_REPO=disinto-admin/disinto-ops
      - PRIMARY_BRANCH=main
      - DISINTO_CONTAINER=1
-      - FORGE_ADMIN_USERS=disinto-admin,vault-bot,admin
+      - HOST_PROJECT_DIR=${HOST_PROJECT_DIR:-.}
      - PROJECTS_DIR=/opt/disinto-projects
    ports:
      - "80:80"
      - "443:443"
    healthcheck:
      test: ["CMD", "curl", "-fsS", "http://localhost:2019/config/"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 15s
    depends_on:
      - forgejo
    networks:
      - disinto-net
  forgejo:
-    image: codeberg.org/forgejo/forgejo:11.0
+    image: codeberg.org/forgejo/forgejo:1
    container_name: disinto-forgejo
    restart: unless-stopped
    security_opt:
      - apparmor=unconfined
    volumes:
-      - forgejo-data:/data
+      - ./data/forgejo:/data
    environment:
      - FORGEJO__database__DB_TYPE=sqlite3
-      - FORGEJO__server__ROOT_URL=http://forgejo:3000/
+      - FORGEJO__service__REGISTER_EMAIL_CONFIRMATION=false
-      - FORGEJO__server__HTTP_PORT=3000
+      - FORGEJO__service__ENABLE_NOTIFY_MAIL=false
      - FORGEJO__security__INSTALL_LOCK=true
      - FORGEJO__service__DISABLE_REGISTRATION=true
-      - FORGEJO__webhook__ALLOWED_HOST_LIST=private
+      - FORGEJO__service__REQUIRE_SIGNIN_VIEW=true
    healthcheck:
      test: ["CMD", "curl", "-sf", "http://localhost:3000/api/v1/version"]
      interval: 5s
      timeout: 3s
      retries: 30
      start_period: 30s
    ports:
      - "3000:3000"
    networks:
      - disinto-net
 volumes:
  disinto-logs:
  agent-data:
  project-repos:
  woodpecker-data:
  forgejo-data:
 networks:
  disinto-net:
    driver: bridge
--- a/docker/agents/Dockerfile
+++ b/docker/agents/Dockerfile
@ -28,9 +28,6 @@ RUN chmod +x /entrypoint.sh
 # Entrypoint runs polling loop directly, dropping to agent user via gosu.
 # All scripts execute as the agent user (UID 1000) while preserving env vars.
 VOLUME /home/agent/data
 VOLUME /home/agent/repos
 WORKDIR /home/agent/disinto
 ENTRYPOINT ["/entrypoint.sh"]
--- a/docker/agents/entrypoint.sh
+++ b/docker/agents/entrypoint.sh
@ -7,24 +7,18 @@ set -euo pipefail
 # poll scripts.  All Docker Compose env vars are inherited (PATH, FORGE_TOKEN,
 # ANTHROPIC_API_KEY, etc.).
 #
-# AGENT_ROLES env var controls which scripts run: "review,dev,gardener,architect,planner,predictor,supervisor"
+# AGENT_ROLES env var controls which scripts run: "review,dev,gardener,architect,planner,predictor"
-# (default: all seven). Uses while-true loop with staggered intervals:
+# (default: all six). Uses while-true loop with staggered intervals:
 #   - review-poll: every 5 minutes (offset by 0s)
 #   - dev-poll: every 5 minutes (offset by 2 minutes)
-#   - gardener: every GARDENER_INTERVAL seconds (default: 21600 = 6 hours)
+#   - gardener: every 6 hours by default (72 iterations * 5 min), configurable via GARDENER_INTERVAL
-#   - architect: every ARCHITECT_INTERVAL seconds (default: 21600 = 6 hours)
+#   - architect: every 6 hours by default (same as gardener), configurable via ARCHITECT_INTERVAL
-#   - planner: every PLANNER_INTERVAL seconds (default: 43200 = 12 hours)
+#   - planner: every 12 hours (144 iterations * 5 min)
 #   - predictor: every 24 hours (288 iterations * 5 min)
 #   - supervisor: every SUPERVISOR_INTERVAL seconds (default: 1200 = 20 min)
-DISINTO_BAKED="/home/agent/disinto"
+DISINTO_DIR="/home/agent/disinto"
 DISINTO_LIVE="/home/agent/repos/_factory"
 DISINTO_DIR="$DISINTO_BAKED"  # start with baked copy; switched to live checkout after bootstrap
 LOGFILE="/home/agent/data/agent-entrypoint.log"
-
+mkdir -p /home/agent/data/logs
 # Create all expected log subdirectories and set ownership as root before dropping to agent.
 # This handles both fresh volumes and stale root-owned dirs from prior container runs.
 mkdir -p /home/agent/data/logs/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher}
 chown -R agent:agent /home/agent/data
 log() {
@ -43,46 +37,42 @@ init_state_dir() {
  log "Initialized state directory"
 }
-# Source shared git credential helper library (#604).
+# Configure git credential helper for password-based HTTP auth.
-# shellcheck source=lib/git-creds.sh
+# Forgejo 11.x rejects API tokens for git push (#361); password auth works.
-source "${DISINTO_BAKED}/lib/git-creds.sh"
+# This ensures all git operations (clone, fetch, push) from worktrees use
-
+# password auth without needing tokens embedded in remote URLs.
-# Wrapper that calls the shared configure_git_creds with agent-specific paths,
+configure_git_creds() {
 # then repairs any legacy baked-credential URLs in existing clones.
 _setup_git_creds() {
  _GIT_CREDS_LOG_FN=log configure_git_creds "/home/agent" "gosu agent"
  if [ -n "${FORGE_PASS:-}" ] && [ -n "${FORGE_URL:-}" ]; then
-    log "Git credential helper configured (password auth)"
+    _forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||')
    _forge_proto=$(printf '%s' "$FORGE_URL" | sed 's|://.*||')
    # Determine the bot username from FORGE_TOKEN identity (or default to dev-bot)
    _bot_user=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
      "${FORGE_URL}/api/v1/user" 2>/dev/null | jq -r '.login // empty') || _bot_user=""
    _bot_user="${_bot_user:-dev-bot}"
    # Write a static credential helper script (git credential protocol)
    cat > /home/agent/.git-credentials-helper <<CREDEOF
 #!/bin/sh
 # Auto-generated git credential helper for Forgejo password auth (#361)
 # Only respond to "get" action; ignore "store" and "erase".
 [ "\$1" = "get" ] || exit 0
 # Read and discard stdin (git sends protocol/host info)
 cat >/dev/null
 echo "protocol=${_forge_proto}"
 echo "host=${_forge_host}"
 echo "username=${_bot_user}"
 echo "password=${FORGE_PASS}"
 CREDEOF
    chmod 755 /home/agent/.git-credentials-helper
    chown agent:agent /home/agent/.git-credentials-helper
    gosu agent bash -c "git config --global credential.helper '/home/agent/.git-credentials-helper'"
    log "Git credential helper configured for ${_bot_user}@${_forge_host} (password auth)"
  fi
-  # Repair legacy clones with baked-in stale credentials (#604).
+  # Set safe.directory to work around dubious ownership after container restart
-  _GIT_CREDS_LOG_FN=log repair_baked_cred_urls --as "gosu agent" /home/agent/repos
+  # (https://github.com/disinto-admin/disinto/issues/517)
-}
+  gosu agent bash -c "git config --global --add safe.directory '*'"
 # Configure git author identity for commits made by this container.
 # Derives identity from the resolved bot user (BOT_USER) to ensure commits
 # are visibly attributable to the correct bot in the forge timeline.
 # BOT_USER is normally set by configure_git_creds() (#741); this function
 # only falls back to its own API call if BOT_USER was not already resolved.
 configure_git_identity() {
  # Resolve BOT_USER from FORGE_TOKEN if not already set (configure_git_creds
  # exports BOT_USER on success, so this is a fallback for edge cases only).
  if [ -z "${BOT_USER:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then
    BOT_USER=$(curl -sf --max-time 10 \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty') || true
  fi
  if [ -z "${BOT_USER:-}" ]; then
    log "WARNING: Could not resolve bot username for git identity — commits will use fallback"
    BOT_USER="agent"
  fi
  # Configure git identity for all repositories
  gosu agent git config --global user.name "${BOT_USER}"
  gosu agent git config --global user.email "${BOT_USER}@disinto.local"
  log "Git identity configured: ${BOT_USER} <${BOT_USER}@disinto.local>"
 }
 # Configure tea CLI login for forge operations (runs as agent user).
@ -107,16 +97,8 @@ configure_tea_login() {
 log "Agent container starting"
-# Set USER and HOME for scripts that source lib/env.sh.
+# Set USER for scripts that source lib/env.sh (e.g., OPS_REPO_ROOT default)
 # These are preconditions required by lib/env.sh's surface contract.
 # gosu agent inherits the parent's env, so exports here propagate to all children.
 export USER=agent
 export HOME=/home/agent
 # Source lib/env.sh to get DISINTO_LOG_DIR and other shared environment.
 # This must happen after USER/HOME are set (env.sh preconditions).
 # shellcheck source=lib/env.sh
 source "${DISINTO_BAKED}/lib/env.sh"
 # Verify Claude CLI is available (expected via volume mount from host).
 if ! command -v claude &>/dev/null; then
@ -133,216 +115,29 @@ log "Claude CLI: $(claude --version 2>&1 || true)"
 # auth method is active so operators can debug 401s.
 if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
  log "Auth: ANTHROPIC_API_KEY is set — using API key (no OAuth rotation)"
-elif [ -f "${CLAUDE_CONFIG_DIR:-/home/agent/.claude}/.credentials.json" ]; then
+elif [ -f /home/agent/.claude/credentials.json ]; then
-  log "Auth: OAuth credentials mounted from host (${CLAUDE_CONFIG_DIR:-~/.claude})"
+  log "Auth: OAuth credentials mounted from host (~/.claude)"
 else
  log "WARNING: No ANTHROPIC_API_KEY and no OAuth credentials found."
  log "Run 'claude auth login' on the host, or set ANTHROPIC_API_KEY in .env"
 fi
 # Bootstrap ops repos for each project TOML (#586).
 # In compose mode the ops repo lives on a Docker named volume at
 # /home/agent/repos/<project>-ops.  If init ran migrate_ops_repo on the host
 # the container never saw those changes.  This function clones from forgejo
 # when the repo is missing, or configures the remote and pulls when it exists
 # but has no remote (orphaned local-only checkout).
 bootstrap_ops_repos() {
  local repos_dir="/home/agent/repos"
  mkdir -p "$repos_dir"
  chown agent:agent "$repos_dir"
  for toml in "${DISINTO_DIR}"/projects/*.toml; do
    [ -f "$toml" ] || continue
    # Extract project name, ops repo slug, repo slug, and primary branch from TOML
    local project_name ops_slug primary_branch
    local _toml_vals
    _toml_vals=$(python3 -c "
 import tomllib, sys
 with open(sys.argv[1], 'rb') as f:
    cfg = tomllib.load(f)
 print(cfg.get('name', ''))
 print(cfg.get('ops_repo', ''))
 print(cfg.get('repo', ''))
 print(cfg.get('primary_branch', 'main'))
 " "$toml" 2>/dev/null || true)
    project_name=$(sed -n '1p' <<< "$_toml_vals")
    [ -n "$project_name" ] || continue
    ops_slug=$(sed -n '2p' <<< "$_toml_vals")
    local repo_slug
    repo_slug=$(sed -n '3p' <<< "$_toml_vals")
    primary_branch=$(sed -n '4p' <<< "$_toml_vals")
    primary_branch="${primary_branch:-main}"
    # Fall back to convention if ops_repo not in TOML
    if [ -z "$ops_slug" ]; then
      if [ -n "$repo_slug" ]; then
        ops_slug="${repo_slug}-ops"
      else
        ops_slug="disinto-admin/${project_name}-ops"
      fi
    fi
    local ops_root="${repos_dir}/${project_name}-ops"
    local remote_url="${FORGE_URL}/${ops_slug}.git"
    if [ ! -d "${ops_root}/.git" ]; then
      # Clone ops repo from forgejo
      log "Ops bootstrap: cloning ${ops_slug} -> ${ops_root}"
      if gosu agent git clone --quiet "$remote_url" "$ops_root" 2>/dev/null; then
        log "Ops bootstrap: ${ops_slug} cloned successfully"
      else
        # Remote may not exist yet (first run before init); create empty repo
        log "Ops bootstrap: clone failed for ${ops_slug} — initializing empty repo"
        gosu agent bash -c "
          mkdir -p '${ops_root}' && \
          git -C '${ops_root}' init --initial-branch='${primary_branch}' -q && \
          git -C '${ops_root}' remote add origin '${remote_url}'
        "
      fi
    else
      # Repo exists — ensure remote is configured and pull latest
      local current_remote
      current_remote=$(git -C "$ops_root" remote get-url origin 2>/dev/null || true)
      if [ -z "$current_remote" ]; then
        log "Ops bootstrap: adding missing remote to ${ops_root}"
        gosu agent git -C "$ops_root" remote add origin "$remote_url"
      elif [ "$current_remote" != "$remote_url" ]; then
        log "Ops bootstrap: fixing remote URL in ${ops_root}"
        gosu agent git -C "$ops_root" remote set-url origin "$remote_url"
      fi
      # Pull latest from forgejo to pick up any host-side migrations
      log "Ops bootstrap: pulling latest for ${project_name}-ops"
      gosu agent bash -c "
        cd '${ops_root}' && \
        git fetch origin '${primary_branch}' --quiet 2>/dev/null && \
        git reset --hard 'origin/${primary_branch}' --quiet 2>/dev/null
      " || log "Ops bootstrap: pull failed for ${ops_slug} (remote may not exist yet)"
    fi
  done
 }
 # Bootstrap the factory (disinto) repo from Forgejo into the project-repos
 # volume so the entrypoint runs from a live git checkout that receives
 # updates via `git pull`, not the stale baked copy from `COPY .` (#593).
 bootstrap_factory_repo() {
  local repo="${FACTORY_REPO:-}"
  if [ -z "$repo" ]; then
    log "Factory bootstrap: FACTORY_REPO not set — running from baked copy"
    return 0
  fi
  local remote_url="${FORGE_URL}/${repo}.git"
  local primary_branch="${PRIMARY_BRANCH:-main}"
  if [ ! -d "${DISINTO_LIVE}/.git" ]; then
    log "Factory bootstrap: cloning ${repo} -> ${DISINTO_LIVE}"
    if gosu agent git clone --quiet --branch "$primary_branch" "$remote_url" "$DISINTO_LIVE" 2>&1; then
      log "Factory bootstrap: cloned successfully"
    else
      log "Factory bootstrap: clone failed — running from baked copy"
      return 0
    fi
  else
    log "Factory bootstrap: pulling latest ${repo}"
    gosu agent bash -c "
      cd '${DISINTO_LIVE}' && \
      git fetch origin '${primary_branch}' --quiet 2>/dev/null && \
      git reset --hard 'origin/${primary_branch}' --quiet 2>/dev/null
    " || log "Factory bootstrap: pull failed — using existing checkout"
  fi
  # Copy project TOMLs from baked dir — they are gitignored AND docker-ignored,
  # so neither the image nor the clone normally contains them.  If the baked
  # copy has any (e.g. operator manually placed them), propagate them.
  if compgen -G "${DISINTO_BAKED}/projects/*.toml" >/dev/null 2>&1; then
    mkdir -p "${DISINTO_LIVE}/projects"
    cp "${DISINTO_BAKED}"/projects/*.toml "${DISINTO_LIVE}/projects/"
    chown -R agent:agent "${DISINTO_LIVE}/projects"
    log "Factory bootstrap: copied project TOMLs to live checkout"
  fi
  # Verify the live checkout has the expected structure
  if [ -f "${DISINTO_LIVE}/lib/env.sh" ]; then
    DISINTO_DIR="$DISINTO_LIVE"
    log "Factory bootstrap: DISINTO_DIR switched to live checkout at ${DISINTO_LIVE}"
  else
    log "Factory bootstrap: live checkout missing expected files — falling back to baked copy"
  fi
 }
 # Ensure the project repo is cloned on first run (#589).
 # The agents container uses a named volume (project-repos) at /home/agent/repos.
 # On first startup, if the project repo is missing, clone it from FORGE_URL/FORGE_REPO.
 # This makes the agents container self-healing and independent of init's host clone.
 ensure_project_clone() {
  # shellcheck disable=SC2153
  local repo_dir="/home/agent/repos/${PROJECT_NAME}"
  if [ -d "${repo_dir}/.git" ]; then
    log "Project repo present at ${repo_dir}"
    return 0
  fi
  if [ -z "${FORGE_REPO:-}" ] || [ -z "${FORGE_URL:-}" ]; then
    log "Cannot clone project repo: FORGE_REPO or FORGE_URL unset"
    return 1
  fi
  log "Cloning ${FORGE_URL}/${FORGE_REPO}.git -> ${repo_dir} (first run)"
  mkdir -p "$(dirname "$repo_dir")"
  chown -R agent:agent "$(dirname "$repo_dir")"
  if gosu agent git clone --quiet "${FORGE_URL}/${FORGE_REPO}.git" "$repo_dir"; then
    log "Project repo cloned"
  else
    log "Project repo clone failed — agents may fail until manually fixed"
    return 1
  fi
 }
 # Pull latest factory code at the start of each poll iteration (#593).
 # Runs as the agent user; failures are non-fatal (stale code still works).
 pull_factory_repo() {
  [ "$DISINTO_DIR" = "$DISINTO_LIVE" ] || return 0
  local primary_branch="${PRIMARY_BRANCH:-main}"
  gosu agent bash -c "
    cd '${DISINTO_LIVE}' && \
    git fetch origin '${primary_branch}' --quiet 2>/dev/null && \
    git reset --hard 'origin/${primary_branch}' --quiet 2>/dev/null
  " || log "Factory pull failed — continuing with current checkout"
 }
 # Configure git and tea once at startup (as root, then drop to agent)
-_setup_git_creds
+configure_git_creds
 configure_git_identity
 configure_tea_login
 # Clone project repo on first run (makes agents self-healing, #589)
 ensure_project_clone
 # Bootstrap ops repos from forgejo into container volumes (#586)
 bootstrap_ops_repos
 # Bootstrap factory repo — switch DISINTO_DIR to live checkout (#593)
 bootstrap_factory_repo
 # Initialize state directory for check_active guards
 init_state_dir
 # Parse AGENT_ROLES env var (default: all agents)
 # Expected format: comma-separated list like "review,dev,gardener"
-AGENT_ROLES="${AGENT_ROLES:-review,dev,gardener,architect,planner,predictor,supervisor}"
+AGENT_ROLES="${AGENT_ROLES:-review,dev,gardener,architect,planner,predictor}"
 log "Agent roles configured: ${AGENT_ROLES}"
 # Poll interval in seconds (5 minutes default)
 POLL_INTERVAL="${POLL_INTERVAL:-300}"
 # Gardener and architect intervals (default 6 hours = 21600 seconds)
 GARDENER_INTERVAL="${GARDENER_INTERVAL:-21600}"
 ARCHITECT_INTERVAL="${ARCHITECT_INTERVAL:-21600}"
 PLANNER_INTERVAL="${PLANNER_INTERVAL:-43200}"
 SUPERVISOR_INTERVAL="${SUPERVISOR_INTERVAL:-1200}"
 log "Entering polling loop (interval: ${POLL_INTERVAL}s, roles: ${AGENT_ROLES})"
 log "Gardener interval: ${GARDENER_INTERVAL}s, Architect interval: ${ARCHITECT_INTERVAL}s, Planner interval: ${PLANNER_INTERVAL}s, Supervisor interval: ${SUPERVISOR_INTERVAL}s"
 # Main polling loop using iteration counter for gardener scheduling
 iteration=0
@ -350,9 +145,6 @@ while true; do
  iteration=$((iteration + 1))
  now=$(date +%s)
  # Pull latest factory code so poll scripts stay current (#593)
  pull_factory_repo
  # Stale .sid cleanup — needed for agents that don't support --resume
  # Run this as the agent user
  gosu agent bash -c "rm -f /tmp/dev-session-*.sid /tmp/review-session-*.sid 2>/dev/null || true"
@ -361,39 +153,17 @@ while true; do
  # Fast agents (review-poll, dev-poll) run in background so they don't block
  # each other.  Slow agents (gardener, architect, planner, predictor) also run
  # in background but are guarded by pgrep so only one instance runs at a time.
-  # Per-session CLAUDE_CONFIG_DIR isolation handles OAuth concurrency natively.
+  # The flock on session.lock already serializes claude -p calls.
  # Set CLAUDE_EXTERNAL_LOCK=1 to re-enable the legacy flock serialization.
  for toml in "${DISINTO_DIR}"/projects/*.toml; do
    [ -f "$toml" ] || continue
    # Parse project name and primary branch from TOML so env.sh preconditions
    # are satisfied when agent scripts source it (#674).
    _toml_vals=$(python3 -c "
 import tomllib, sys
 with open(sys.argv[1], 'rb') as f:
    cfg = tomllib.load(f)
 print(cfg.get('name', ''))
 print(cfg.get('primary_branch', 'main'))
 " "$toml" 2>/dev/null || true)
    _pname=$(sed -n '1p' <<< "$_toml_vals")
    _pbranch=$(sed -n '2p' <<< "$_toml_vals")
    [ -n "$_pname" ] || { log "WARNING: could not parse project name from ${toml} — skipping"; continue; }
    export PROJECT_NAME="$_pname"
    export PROJECT_REPO_ROOT="/home/agent/repos/${_pname}"
    export OPS_REPO_ROOT="/home/agent/repos/${_pname}-ops"
    export PRIMARY_BRANCH="${_pbranch:-main}"
    log "Processing project TOML: ${toml}"
    # --- Fast agents: run in background, wait before slow agents ---
    FAST_PIDS=()
    # Review poll (every iteration)
    if [[ ",${AGENT_ROLES}," == *",review,"* ]]; then
      log "Running review-poll (iteration ${iteration}) for ${toml}"
-      gosu agent bash -c "cd ${DISINTO_DIR} && bash review/review-poll.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/review-poll.log" 2>&1 &
+      gosu agent bash -c "cd ${DISINTO_DIR} && bash review/review-poll.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/review-poll.log" 2>&1 &
      FAST_PIDS+=($!)
    fi
    sleep 2  # stagger fast polls
@ -401,51 +171,50 @@ print(cfg.get('primary_branch', 'main'))
    # Dev poll (every iteration)
    if [[ ",${AGENT_ROLES}," == *",dev,"* ]]; then
      log "Running dev-poll (iteration ${iteration}) for ${toml}"
-      gosu agent bash -c "cd ${DISINTO_DIR} && bash dev/dev-poll.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/dev-poll.log" 2>&1 &
+      gosu agent bash -c "cd ${DISINTO_DIR} && bash dev/dev-poll.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/dev-poll.log" 2>&1 &
      FAST_PIDS+=($!)
    fi
-    # Wait only for THIS iteration's fast polls — long-running gardener/dev-agent
+    # Wait for fast polls to finish before launching slow agents
-    # from prior iterations must not block us.
+    wait
    if [ ${#FAST_PIDS[@]} -gt 0 ]; then
      wait "${FAST_PIDS[@]}"
    fi
    # --- Slow agents: run in background with pgrep guard ---
-    # Gardener (interval configurable via GARDENER_INTERVAL env var)
+    # Gardener (default 6 hours = 21600 seconds)
    if [[ ",${AGENT_ROLES}," == *",gardener,"* ]]; then
      gardener_iteration=$((iteration * POLL_INTERVAL))
-      if [ $((gardener_iteration % GARDENER_INTERVAL)) -eq 0 ] && [ "$now" -ge "$gardener_iteration" ]; then
+      gardener_interval="${GARDENER_INTERVAL:-21600}"  # default 6h, override via env var
      if [ $((gardener_iteration % gardener_interval)) -eq 0 ] && [ "$now" -ge "$gardener_iteration" ]; then
        if ! pgrep -f "gardener-run.sh" >/dev/null; then
-          log "Running gardener (iteration ${iteration}, ${GARDENER_INTERVAL}s interval) for ${toml}"
+          log "Running gardener (iteration ${iteration}, ${gardener_interval}s interval) for ${toml}"
-          gosu agent bash -c "cd ${DISINTO_DIR} && bash gardener/gardener-run.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/gardener.log" 2>&1 &
+          gosu agent bash -c "cd ${DISINTO_DIR} && bash gardener/gardener-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/gardener.log" 2>&1 &
        else
          log "Skipping gardener — already running"
        fi
      fi
    fi
-    # Architect (interval configurable via ARCHITECT_INTERVAL env var)
+    # Architect (default 6 hours, same schedule as gardener)
    if [[ ",${AGENT_ROLES}," == *",architect,"* ]]; then
      architect_iteration=$((iteration * POLL_INTERVAL))
-      if [ $((architect_iteration % ARCHITECT_INTERVAL)) -eq 0 ] && [ "$now" -ge "$architect_iteration" ]; then
+      architect_interval="${ARCHITECT_INTERVAL:-21600}"  # default 6h, override via env var
      if [ $((architect_iteration % architect_interval)) -eq 0 ] && [ "$now" -ge "$architect_iteration" ]; then
        if ! pgrep -f "architect-run.sh" >/dev/null; then
-          log "Running architect (iteration ${iteration}, ${ARCHITECT_INTERVAL}s interval) for ${toml}"
+          log "Running architect (iteration ${iteration}, ${architect_interval}s interval) for ${toml}"
-          gosu agent bash -c "cd ${DISINTO_DIR} && bash architect/architect-run.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/architect.log" 2>&1 &
+          gosu agent bash -c "cd ${DISINTO_DIR} && bash architect/architect-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/architect.log" 2>&1 &
        else
          log "Skipping architect — already running"
        fi
      fi
    fi
-    # Planner (interval configurable via PLANNER_INTERVAL env var)
+    # Planner (every 12 hours = 144 iterations * 5 min = 43200 seconds)
    if [[ ",${AGENT_ROLES}," == *",planner,"* ]]; then
      planner_iteration=$((iteration * POLL_INTERVAL))
-      if [ $((planner_iteration % PLANNER_INTERVAL)) -eq 0 ] && [ "$now" -ge "$planner_iteration" ]; then
+      planner_interval=$((12 * 60 * 60))  # 12 hours in seconds
      if [ $((planner_iteration % planner_interval)) -eq 0 ] && [ "$now" -ge "$planner_iteration" ]; then
        if ! pgrep -f "planner-run.sh" >/dev/null; then
-          log "Running planner (iteration ${iteration}, ${PLANNER_INTERVAL}s interval) for ${toml}"
+          log "Running planner (iteration ${iteration}, 12-hour interval) for ${toml}"
-          gosu agent bash -c "cd ${DISINTO_DIR} && bash planner/planner-run.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/planner.log" 2>&1 &
+          gosu agent bash -c "cd ${DISINTO_DIR} && bash planner/planner-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/planner.log" 2>&1 &
        else
          log "Skipping planner — already running"
        fi
@ -459,25 +228,12 @@ print(cfg.get('primary_branch', 'main'))
      if [ $((predictor_iteration % predictor_interval)) -eq 0 ] && [ "$now" -ge "$predictor_iteration" ]; then
        if ! pgrep -f "predictor-run.sh" >/dev/null; then
          log "Running predictor (iteration ${iteration}, 24-hour interval) for ${toml}"
-          gosu agent bash -c "cd ${DISINTO_DIR} && bash predictor/predictor-run.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/predictor.log" 2>&1 &
+          gosu agent bash -c "cd ${DISINTO_DIR} && bash predictor/predictor-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/predictor.log" 2>&1 &
        else
          log "Skipping predictor — already running"
        fi
      fi
    fi
    # Supervisor (interval configurable via SUPERVISOR_INTERVAL env var, default 20 min)
    if [[ ",${AGENT_ROLES}," == *",supervisor,"* ]]; then
      supervisor_iteration=$((iteration * POLL_INTERVAL))
      if [ $((supervisor_iteration % SUPERVISOR_INTERVAL)) -eq 0 ] && [ "$now" -ge "$supervisor_iteration" ]; then
        if ! pgrep -f "supervisor-run.sh" >/dev/null; then
          log "Running supervisor (iteration ${iteration}, ${SUPERVISOR_INTERVAL}s interval) for ${toml}"
          gosu agent bash -c "cd ${DISINTO_DIR} && bash supervisor/supervisor-run.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/supervisor.log" 2>&1 &
        else
          log "Skipping supervisor — already running"
        fi
      fi
    fi
  done
  sleep "${POLL_INTERVAL}"
--- a/docker/chat/Dockerfile
+++ b/docker/chat/Dockerfile
@ -1,35 +0,0 @@
 # disinto-chat — minimal HTTP backend for Claude chat UI
 #
 # Small Debian slim base with Python runtime.
 # Chosen for simplicity and small image size (~100MB).
 #
 # Image size: ~100MB (well under the 200MB ceiling)
 #
 # The claude binary is mounted from the host at runtime via docker-compose,
 # not baked into the image — same pattern as the agents container.
 FROM debian:bookworm-slim
 # Install Python (no build-time network access needed)
 RUN apt-get update && apt-get install -y --no-install-recommends \
    python3 \
    && rm -rf /var/lib/apt/lists/*
 # Non-root user — fixed UID 10001 for sandbox hardening (#706)
 RUN useradd -m -u 10001 -s /bin/bash chat
 # Copy application files
 COPY server.py /usr/local/bin/server.py
 COPY entrypoint-chat.sh /entrypoint-chat.sh
 COPY ui/ /var/chat/ui/
 RUN chmod +x /entrypoint-chat.sh /usr/local/bin/server.py
 USER chat
 WORKDIR /var/chat
 EXPOSE 8080
 HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
  CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')" || exit 1
 ENTRYPOINT ["/entrypoint-chat.sh"]
--- a/docker/chat/entrypoint-chat.sh
+++ b/docker/chat/entrypoint-chat.sh
@ -1,37 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # entrypoint-chat.sh — Start the disinto-chat backend server
 #
 # Exec-replace pattern: this script is the container entrypoint and runs
 # the server directly (no wrapper needed). Logs to stdout for docker logs.
 LOGFILE="/tmp/chat.log"
 log() {
    printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" | tee -a "$LOGFILE"
 }
 # Sandbox sanity checks (#706) — fail fast if isolation is broken
 if [ -e /var/run/docker.sock ]; then
    log "FATAL: /var/run/docker.sock is accessible — sandbox violation"
    exit 1
 fi
 if [ "$(id -u)" = "0" ]; then
    log "FATAL: running as root (uid 0) — sandbox violation"
    exit 1
 fi
 # Verify Claude CLI is available (expected via volume mount from host).
 if ! command -v claude &>/dev/null; then
    log "FATAL: claude CLI not found in PATH"
    log "Mount the host binary into the container, e.g.:"
    log "  volumes:"
    log "    - /usr/local/bin/claude:/usr/local/bin/claude:ro"
    exit 1
 fi
 log "Claude CLI: $(claude --version 2>&1 || true)"
 # Start the Python server (exec-replace so signals propagate correctly)
 log "Starting disinto-chat server on port 8080..."
 exec python3 /usr/local/bin/server.py
--- a/docker/chat/server.py
+++ b/docker/chat/server.py
@ -1,957 +0,0 @@
 #!/usr/bin/env python3
 """
 disinto-chat server — minimal HTTP backend for Claude chat UI.
 Routes:
    GET /chat/auth/verify    -> Caddy forward_auth callback (returns 200+X-Forwarded-User or 401)
    GET /chat/login          -> 302 to Forgejo OAuth authorize
    GET /chat/oauth/callback -> exchange code for token, validate user, set session
    GET /chat/               -> serves index.html (session required)
    GET /chat/static/*       -> serves static assets (session required)
    POST /chat               -> spawns `claude --print` with user message (session required)
    GET /ws                  -> reserved for future streaming upgrade (returns 501)
 OAuth flow:
    1. User hits any /chat/* route without a valid session cookie -> 302 /chat/login
    2. /chat/login redirects to Forgejo /login/oauth/authorize
    3. Forgejo redirects back to /chat/oauth/callback with ?code=...&state=...
    4. Server exchanges code for access token, fetches /api/v1/user
    5. Asserts user is in allowlist, sets HttpOnly session cookie
    6. Redirects to /chat/
 The claude binary is expected to be mounted from the host at /usr/local/bin/claude.
 """
 import datetime
 import json
 import os
 import re
 import secrets
 import subprocess
 import sys
 import time
 from http.server import HTTPServer, BaseHTTPRequestHandler
 from urllib.parse import urlparse, parse_qs, urlencode
 # Configuration
 HOST = os.environ.get("CHAT_HOST", "0.0.0.0")
 PORT = int(os.environ.get("CHAT_PORT", 8080))
 UI_DIR = "/var/chat/ui"
 STATIC_DIR = os.path.join(UI_DIR, "static")
 CLAUDE_BIN = "/usr/local/bin/claude"
 # OAuth configuration
 FORGE_URL = os.environ.get("FORGE_URL", "http://localhost:3000")
 CHAT_OAUTH_CLIENT_ID = os.environ.get("CHAT_OAUTH_CLIENT_ID", "")
 CHAT_OAUTH_CLIENT_SECRET = os.environ.get("CHAT_OAUTH_CLIENT_SECRET", "")
 EDGE_TUNNEL_FQDN = os.environ.get("EDGE_TUNNEL_FQDN", "")
 # Shared secret for Caddy forward_auth verify endpoint (#709).
 # When set, only requests carrying this value in X-Forward-Auth-Secret are
 # allowed to call /chat/auth/verify.  When empty the endpoint is unrestricted
 # (acceptable during local dev; production MUST set this).
 FORWARD_AUTH_SECRET = os.environ.get("FORWARD_AUTH_SECRET", "")
 # Rate limiting / cost caps (#711)
 CHAT_MAX_REQUESTS_PER_HOUR = int(os.environ.get("CHAT_MAX_REQUESTS_PER_HOUR", 60))
 CHAT_MAX_REQUESTS_PER_DAY = int(os.environ.get("CHAT_MAX_REQUESTS_PER_DAY", 500))
 CHAT_MAX_TOKENS_PER_DAY = int(os.environ.get("CHAT_MAX_TOKENS_PER_DAY", 1000000))
 # Allowed users - disinto-admin always allowed; CSV allowlist extends it
 _allowed_csv = os.environ.get("DISINTO_CHAT_ALLOWED_USERS", "")
 ALLOWED_USERS = {"disinto-admin"}
 if _allowed_csv:
    ALLOWED_USERS.update(u.strip() for u in _allowed_csv.split(",") if u.strip())
 # Session cookie name
 SESSION_COOKIE = "disinto_chat_session"
 # Session TTL: 24 hours
 SESSION_TTL = 24 * 60 * 60
 # Chat history directory (bind-mounted from host)
 CHAT_HISTORY_DIR = os.environ.get("CHAT_HISTORY_DIR", "/var/lib/chat/history")
 # Regex for valid conversation_id (12-char hex, no slashes)
 CONVERSATION_ID_PATTERN = re.compile(r"^[0-9a-f]{12}$")
 # In-memory session store: token -> {"user": str, "expires": float}
 _sessions = {}
 # Pending OAuth state tokens: state -> expires (float)
 _oauth_states = {}
 # Per-user rate limiting state (#711)
 # user -> list of request timestamps (for sliding-window hourly/daily caps)
 _request_log = {}
 # user -> {"tokens": int, "date": "YYYY-MM-DD"}
 _daily_tokens = {}
 # MIME types for static files
 MIME_TYPES = {
    ".html": "text/html; charset=utf-8",
    ".js": "application/javascript; charset=utf-8",
    ".css": "text/css; charset=utf-8",
    ".json": "application/json; charset=utf-8",
    ".png": "image/png",
    ".jpg": "image/jpeg",
    ".svg": "image/svg+xml",
    ".ico": "image/x-icon",
 }
 def _build_callback_uri():
    """Build the OAuth callback URI based on tunnel configuration."""
    if EDGE_TUNNEL_FQDN:
        return f"https://{EDGE_TUNNEL_FQDN}/chat/oauth/callback"
    return "http://localhost/chat/oauth/callback"
 def _session_cookie_flags():
    """Return cookie flags appropriate for the deployment mode."""
    flags = "HttpOnly; SameSite=Lax; Path=/chat"
    if EDGE_TUNNEL_FQDN:
        flags += "; Secure"
    return flags
 def _validate_session(cookie_header):
    """Check session cookie and return username if valid, else None."""
    if not cookie_header:
        return None
    for part in cookie_header.split(";"):
        part = part.strip()
        if part.startswith(SESSION_COOKIE + "="):
            token = part[len(SESSION_COOKIE) + 1:]
            session = _sessions.get(token)
            if session and session["expires"] > time.time():
                return session["user"]
            # Expired - clean up
            _sessions.pop(token, None)
            return None
    return None
 def _gc_sessions():
    """Remove expired sessions (called opportunistically)."""
    now = time.time()
    expired = [k for k, v in _sessions.items() if v["expires"] <= now]
    for k in expired:
        del _sessions[k]
    expired_states = [k for k, v in _oauth_states.items() if v <= now]
    for k in expired_states:
        del _oauth_states[k]
 def _exchange_code_for_token(code):
    """Exchange an authorization code for an access token via Forgejo."""
    import urllib.request
    import urllib.error
    data = urlencode({
        "grant_type": "authorization_code",
        "code": code,
        "client_id": CHAT_OAUTH_CLIENT_ID,
        "client_secret": CHAT_OAUTH_CLIENT_SECRET,
        "redirect_uri": _build_callback_uri(),
    }).encode()
    req = urllib.request.Request(
        f"{FORGE_URL}/login/oauth/access_token",
        data=data,
        headers={"Accept": "application/json", "Content-Type": "application/x-www-form-urlencoded"},
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=10) as resp:
            return json.loads(resp.read().decode())
    except (urllib.error.URLError, json.JSONDecodeError, OSError) as e:
        print(f"OAuth token exchange failed: {e}", file=sys.stderr)
        return None
 def _fetch_user(access_token):
    """Fetch the authenticated user from Forgejo API."""
    import urllib.request
    import urllib.error
    req = urllib.request.Request(
        f"{FORGE_URL}/api/v1/user",
        headers={"Authorization": f"token {access_token}", "Accept": "application/json"},
    )
    try:
        with urllib.request.urlopen(req, timeout=10) as resp:
            return json.loads(resp.read().decode())
    except (urllib.error.URLError, json.JSONDecodeError, OSError) as e:
        print(f"User fetch failed: {e}", file=sys.stderr)
        return None
 # =============================================================================
 # Rate Limiting Functions (#711)
 # =============================================================================
 def _check_rate_limit(user):
    """Check per-user rate limits. Returns (allowed, retry_after, reason) (#711).
    Checks hourly request cap, daily request cap, and daily token cap.
    """
    now = time.time()
    one_hour_ago = now - 3600
    today = datetime.date.today().isoformat()
    # Prune old entries from request log
    timestamps = _request_log.get(user, [])
    timestamps = [t for t in timestamps if t > now - 86400]
    _request_log[user] = timestamps
    # Hourly request cap
    hourly = [t for t in timestamps if t > one_hour_ago]
    if len(hourly) >= CHAT_MAX_REQUESTS_PER_HOUR:
        oldest_in_window = min(hourly)
        retry_after = int(oldest_in_window + 3600 - now) + 1
        return False, max(retry_after, 1), "hourly request limit"
    # Daily request cap
    start_of_day = time.mktime(datetime.date.today().timetuple())
    daily = [t for t in timestamps if t >= start_of_day]
    if len(daily) >= CHAT_MAX_REQUESTS_PER_DAY:
        next_day = start_of_day + 86400
        retry_after = int(next_day - now) + 1
        return False, max(retry_after, 1), "daily request limit"
    # Daily token cap
    token_info = _daily_tokens.get(user, {"tokens": 0, "date": today})
    if token_info["date"] != today:
        token_info = {"tokens": 0, "date": today}
        _daily_tokens[user] = token_info
    if token_info["tokens"] >= CHAT_MAX_TOKENS_PER_DAY:
        next_day = start_of_day + 86400
        retry_after = int(next_day - now) + 1
        return False, max(retry_after, 1), "daily token limit"
    return True, 0, ""
 def _record_request(user):
    """Record a request timestamp for the user (#711)."""
    _request_log.setdefault(user, []).append(time.time())
 def _record_tokens(user, tokens):
    """Record token usage for the user (#711)."""
    today = datetime.date.today().isoformat()
    token_info = _daily_tokens.get(user, {"tokens": 0, "date": today})
    if token_info["date"] != today:
        token_info = {"tokens": 0, "date": today}
    token_info["tokens"] += tokens
    _daily_tokens[user] = token_info
 def _parse_stream_json(output):
    """Parse stream-json output from claude --print (#711).
    Returns (text_content, total_tokens).  Falls back gracefully if the
    usage event is absent or malformed.
    """
    text_parts = []
    total_tokens = 0
    for line in output.splitlines():
        line = line.strip()
        if not line:
            continue
        try:
            event = json.loads(line)
        except json.JSONDecodeError:
            continue
        etype = event.get("type", "")
        # Collect assistant text
        if etype == "content_block_delta":
            delta = event.get("delta", {})
            if delta.get("type") == "text_delta":
                text_parts.append(delta.get("text", ""))
        elif etype == "assistant":
            # Full assistant message (non-streaming)
            content = event.get("content", "")
            if isinstance(content, str) and content:
                text_parts.append(content)
            elif isinstance(content, list):
                for block in content:
                    if isinstance(block, dict) and block.get("text"):
                        text_parts.append(block["text"])
        # Parse usage from result event
        if etype == "result":
            usage = event.get("usage", {})
            total_tokens = usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
        elif "usage" in event:
            usage = event["usage"]
            if isinstance(usage, dict):
                total_tokens = usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
    return "".join(text_parts), total_tokens
 # =============================================================================
 # Conversation History Functions (#710)
 # =============================================================================
 def _generate_conversation_id():
    """Generate a new conversation ID (12-char hex string)."""
    return secrets.token_hex(6)
 def _validate_conversation_id(conv_id):
    """Validate that conversation_id matches the required format."""
    return bool(CONVERSATION_ID_PATTERN.match(conv_id))
 def _get_user_history_dir(user):
    """Get the history directory path for a user."""
    return os.path.join(CHAT_HISTORY_DIR, user)
 def _get_conversation_path(user, conv_id):
    """Get the full path to a conversation file."""
    user_dir = _get_user_history_dir(user)
    return os.path.join(user_dir, f"{conv_id}.ndjson")
 def _ensure_user_dir(user):
    """Ensure the user's history directory exists."""
    user_dir = _get_user_history_dir(user)
    os.makedirs(user_dir, exist_ok=True)
    return user_dir
 def _write_message(user, conv_id, role, content):
    """Append a message to a conversation file in NDJSON format."""
    conv_path = _get_conversation_path(user, conv_id)
    _ensure_user_dir(user)
    record = {
        "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "user": user,
        "role": role,
        "content": content,
    }
    with open(conv_path, "a", encoding="utf-8") as f:
        f.write(json.dumps(record, ensure_ascii=False) + "\n")
 def _read_conversation(user, conv_id):
    """Read all messages from a conversation file."""
    conv_path = _get_conversation_path(user, conv_id)
    messages = []
    if not os.path.exists(conv_path):
        return None
    try:
        with open(conv_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line:
                    try:
                        messages.append(json.loads(line))
                    except json.JSONDecodeError:
                        # Skip malformed lines
                        continue
    except IOError:
        return None
    return messages
 def _list_user_conversations(user):
    """List all conversation files for a user with first message preview."""
    user_dir = _get_user_history_dir(user)
    conversations = []
    if not os.path.exists(user_dir):
        return conversations
    try:
        for filename in os.listdir(user_dir):
            if not filename.endswith(".ndjson"):
                continue
            conv_id = filename[:-7]  # Remove .ndjson extension
            if not _validate_conversation_id(conv_id):
                continue
            conv_path = os.path.join(user_dir, filename)
            messages = _read_conversation(user, conv_id)
            if messages:
                first_msg = messages[0]
                preview = first_msg.get("content", "")[:50]
                if len(first_msg.get("content", "")) > 50:
                    preview += "..."
                conversations.append({
                    "id": conv_id,
                    "created_at": first_msg.get("ts", ""),
                    "preview": preview,
                    "message_count": len(messages),
                })
            else:
                # Empty conversation file
                conversations.append({
                    "id": conv_id,
                    "created_at": "",
                    "preview": "(empty)",
                    "message_count": 0,
                })
    except OSError:
        pass
    # Sort by created_at descending
    conversations.sort(key=lambda x: x["created_at"] or "", reverse=True)
    return conversations
 def _delete_conversation(user, conv_id):
    """Delete a conversation file."""
    conv_path = _get_conversation_path(user, conv_id)
    if os.path.exists(conv_path):
        os.remove(conv_path)
        return True
    return False
 class ChatHandler(BaseHTTPRequestHandler):
    """HTTP request handler for disinto-chat with Forgejo OAuth."""
    def log_message(self, format, *args):
        """Log to stderr."""
        print(f"[{self.log_date_time_string()}] {format % args}", file=sys.stderr)
    def send_error_page(self, code, message=None):
        """Custom error response."""
        self.send_response(code)
        self.send_header("Content-Type", "text/plain; charset=utf-8")
        self.end_headers()
        if message:
            self.wfile.write(message.encode("utf-8"))
    def _require_session(self):
        """Check session; redirect to /chat/login if missing. Returns username or None."""
        user = _validate_session(self.headers.get("Cookie"))
        if user:
            return user
        self.send_response(302)
        self.send_header("Location", "/chat/login")
        self.end_headers()
        return None
    def _check_forwarded_user(self, session_user):
        """Defense-in-depth: verify X-Forwarded-User matches session user (#709).
        Returns True if the request may proceed, False if a 403 was sent.
        When X-Forwarded-User is absent (forward_auth removed from Caddy),
        the request is rejected - fail-closed by design.
        """
        forwarded = self.headers.get("X-Forwarded-User")
        if not forwarded:
            rid = self.headers.get("X-Request-Id", "-")
            print(
                f"WARN: missing X-Forwarded-User for session_user={session_user} "
                f"req_id={rid} - fail-closed (#709)",
                file=sys.stderr,
            )
            self.send_error_page(403, "Forbidden: missing forwarded-user header")
            return False
        if forwarded != session_user:
            rid = self.headers.get("X-Request-Id", "-")
            print(
                f"WARN: X-Forwarded-User mismatch: header={forwarded} "
                f"session={session_user} req_id={rid} (#709)",
                file=sys.stderr,
            )
            self.send_error_page(403, "Forbidden: user identity mismatch")
            return False
        return True
    def do_GET(self):
        """Handle GET requests."""
        parsed = urlparse(self.path)
        path = parsed.path
        # Health endpoint (no auth required) — used by Docker healthcheck
        if path == "/health":
            self.send_response(200)
            self.send_header("Content-Type", "text/plain")
            self.end_headers()
            self.wfile.write(b"ok\n")
            return
        # Verify endpoint for Caddy forward_auth (#709)
        if path == "/chat/auth/verify":
            self.handle_auth_verify()
            return
        # OAuth routes (no session required)
        if path == "/chat/login":
            self.handle_login()
            return
        if path == "/chat/oauth/callback":
            self.handle_oauth_callback(parsed.query)
            return
        # Conversation list endpoint: GET /chat/history
        if path == "/chat/history":
            user = self._require_session()
            if not user:
                return
            if not self._check_forwarded_user(user):
                return
            self.handle_conversation_list(user)
            return
        # Single conversation endpoint: GET /chat/history/<id>
        if path.startswith("/chat/history/"):
            user = self._require_session()
            if not user:
                return
            if not self._check_forwarded_user(user):
                return
            conv_id = path[len("/chat/history/"):]
            self.handle_conversation_get(user, conv_id)
            return
        # Serve index.html at root
        if path in ("/", "/chat", "/chat/"):
            user = self._require_session()
            if not user:
                return
            if not self._check_forwarded_user(user):
                return
            self.serve_index()
            return
        # Serve static files
        if path.startswith("/chat/static/") or path.startswith("/static/"):
            user = self._require_session()
            if not user:
                return
            if not self._check_forwarded_user(user):
                return
            self.serve_static(path)
            return
        # Reserved WebSocket endpoint (future use)
        if path == "/ws" or path.startswith("/ws"):
            self.send_error_page(501, "WebSocket upgrade not yet implemented")
            return
        # 404 for unknown paths
        self.send_error_page(404, "Not found")
    def do_POST(self):
        """Handle POST requests."""
        parsed = urlparse(self.path)
        path = parsed.path
        # New conversation endpoint (session required)
        if path == "/chat/new":
            user = self._require_session()
            if not user:
                return
            if not self._check_forwarded_user(user):
                return
            self.handle_new_conversation(user)
            return
        # Chat endpoint (session required)
        if path in ("/chat", "/chat/"):
            user = self._require_session()
            if not user:
                return
            if not self._check_forwarded_user(user):
                return
            self.handle_chat(user)
            return
        # 404 for unknown paths
        self.send_error_page(404, "Not found")
    def handle_auth_verify(self):
        """Caddy forward_auth callback - validate session and return X-Forwarded-User (#709).
        Caddy calls this endpoint for every /chat/* request.  If the session
        cookie is valid the endpoint returns 200 with the X-Forwarded-User
        header set to the session username.  Otherwise it returns 401 so Caddy
        knows the request is unauthenticated.
        Access control: when FORWARD_AUTH_SECRET is configured, the request must
        carry a matching X-Forward-Auth-Secret header (shared secret between
        Caddy and the chat backend).
        """
        # Shared-secret gate
        if FORWARD_AUTH_SECRET:
            provided = self.headers.get("X-Forward-Auth-Secret", "")
            if not secrets.compare_digest(provided, FORWARD_AUTH_SECRET):
                self.send_error_page(403, "Forbidden: invalid forward-auth secret")
                return
        user = _validate_session(self.headers.get("Cookie"))
        if not user:
            self.send_error_page(401, "Unauthorized: no valid session")
            return
        self.send_response(200)
        self.send_header("X-Forwarded-User", user)
        self.send_header("Content-Type", "text/plain; charset=utf-8")
        self.end_headers()
        self.wfile.write(b"ok")
    def handle_login(self):
        """Redirect to Forgejo OAuth authorize endpoint."""
        _gc_sessions()
        if not CHAT_OAUTH_CLIENT_ID:
            self.send_error_page(500, "Chat OAuth not configured (CHAT_OAUTH_CLIENT_ID missing)")
            return
        state = secrets.token_urlsafe(32)
        _oauth_states[state] = time.time() + 600  # 10 min validity
        params = urlencode({
            "client_id": CHAT_OAUTH_CLIENT_ID,
            "redirect_uri": _build_callback_uri(),
            "response_type": "code",
            "state": state,
        })
        self.send_response(302)
        self.send_header("Location", f"{FORGE_URL}/login/oauth/authorize?{params}")
        self.end_headers()
    def handle_oauth_callback(self, query_string):
        """Exchange authorization code for token, validate user, set session."""
        params = parse_qs(query_string)
        code = params.get("code", [""])[0]
        state = params.get("state", [""])[0]
        # Validate state
        expected_expiry = _oauth_states.pop(state, None) if state else None
        if not expected_expiry or expected_expiry < time.time():
            self.send_error_page(400, "Invalid or expired OAuth state")
            return
        if not code:
            self.send_error_page(400, "Missing authorization code")
            return
        # Exchange code for access token
        token_resp = _exchange_code_for_token(code)
        if not token_resp or "access_token" not in token_resp:
            self.send_error_page(502, "Failed to obtain access token from Forgejo")
            return
        access_token = token_resp["access_token"]
        # Fetch user info
        user_info = _fetch_user(access_token)
        if not user_info or "login" not in user_info:
            self.send_error_page(502, "Failed to fetch user info from Forgejo")
            return
        username = user_info["login"]
        # Check allowlist
        if username not in ALLOWED_USERS:
            self.send_response(403)
            self.send_header("Content-Type", "text/plain; charset=utf-8")
            self.end_headers()
            self.wfile.write(
                f"Not authorised: user '{username}' is not in the allowed users list.\n".encode()
            )
            return
        # Create session
        session_token = secrets.token_urlsafe(48)
        _sessions[session_token] = {
            "user": username,
            "expires": time.time() + SESSION_TTL,
        }
        cookie_flags = _session_cookie_flags()
        self.send_response(302)
        self.send_header("Set-Cookie", f"{SESSION_COOKIE}={session_token}; {cookie_flags}")
        self.send_header("Location", "/chat/")
        self.end_headers()
    def serve_index(self):
        """Serve the main index.html file."""
        index_path = os.path.join(UI_DIR, "index.html")
        if not os.path.exists(index_path):
            self.send_error_page(500, "UI not found")
            return
        try:
            with open(index_path, "r", encoding="utf-8") as f:
                content = f.read()
            self.send_response(200)
            self.send_header("Content-Type", MIME_TYPES[".html"])
            self.send_header("Content-Length", len(content.encode("utf-8")))
            self.end_headers()
            self.wfile.write(content.encode("utf-8"))
        except IOError as e:
            self.send_error_page(500, f"Error reading index.html: {e}")
    def serve_static(self, path):
        """Serve static files from the static directory."""
        # Strip /chat/static/ or /static/ prefix
        if path.startswith("/chat/static/"):
            relative_path = path[len("/chat/static/"):]
        else:
            relative_path = path[len("/static/"):]
        if ".." in relative_path or relative_path.startswith("/"):
            self.send_error_page(403, "Forbidden")
            return
        file_path = os.path.join(STATIC_DIR, relative_path)
        if not os.path.exists(file_path):
            self.send_error_page(404, "Not found")
            return
        # Determine MIME type
        _, ext = os.path.splitext(file_path)
        content_type = MIME_TYPES.get(ext.lower(), "application/octet-stream")
        try:
            with open(file_path, "rb") as f:
                content = f.read()
            self.send_response(200)
            self.send_header("Content-Type", content_type)
            self.send_header("Content-Length", len(content))
            self.end_headers()
            self.wfile.write(content)
        except IOError as e:
            self.send_error_page(500, f"Error reading file: {e}")
    def _send_rate_limit_response(self, retry_after, reason):
        """Send a 429 response with Retry-After header and HTMX fragment (#711)."""
        body = (
            f'<div class="rate-limit-error">'
            f"Rate limit exceeded: {reason}. "
            f"Please try again in {retry_after} seconds."
            f"</div>"
        )
        self.send_response(429)
        self.send_header("Retry-After", str(retry_after))
        self.send_header("Content-Type", "text/html; charset=utf-8")
        self.send_header("Content-Length", str(len(body.encode("utf-8"))))
        self.end_headers()
        self.wfile.write(body.encode("utf-8"))
    def handle_chat(self, user):
        """
        Handle chat requests by spawning `claude --print` with the user message.
        Enforces per-user rate limits and tracks token usage (#711).
        """
        # Check rate limits before processing (#711)
        allowed, retry_after, reason = _check_rate_limit(user)
        if not allowed:
            self._send_rate_limit_response(retry_after, reason)
            return
        # Read request body
        content_length = int(self.headers.get("Content-Length", 0))
        if content_length == 0:
            self.send_error_page(400, "No message provided")
            return
        body = self.rfile.read(content_length)
        try:
            # Parse form-encoded body
            body_str = body.decode("utf-8")
            params = parse_qs(body_str)
            message = params.get("message", [""])[0]
            conv_id = params.get("conversation_id", [None])[0]
        except (UnicodeDecodeError, KeyError):
            self.send_error_page(400, "Invalid message format")
            return
        if not message:
            self.send_error_page(400, "Empty message")
            return
        # Get user from session
        user = _validate_session(self.headers.get("Cookie"))
        if not user:
            self.send_error_page(401, "Unauthorized")
            return
        # Validate Claude binary exists
        if not os.path.exists(CLAUDE_BIN):
            self.send_error_page(500, "Claude CLI not found")
            return
        # Generate new conversation ID if not provided
        if not conv_id or not _validate_conversation_id(conv_id):
            conv_id = _generate_conversation_id()
        # Record request for rate limiting (#711)
        _record_request(user)
        try:
            # Save user message to history
            _write_message(user, conv_id, "user", message)
            # Spawn claude --print with stream-json for token tracking (#711)
            proc = subprocess.Popen(
                [CLAUDE_BIN, "--print", "--output-format", "stream-json", message],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
            )
            raw_output = proc.stdout.read()
            error_output = proc.stderr.read()
            if error_output:
                print(f"Claude stderr: {error_output}", file=sys.stderr)
            proc.wait()
            if proc.returncode != 0:
                self.send_error_page(500, f"Claude CLI failed with exit code {proc.returncode}")
                return
            # Parse stream-json for text and token usage (#711)
            response, total_tokens = _parse_stream_json(raw_output)
            # Track token usage - does not block *this* request (#711)
            if total_tokens > 0:
                _record_tokens(user, total_tokens)
                print(
                    f"Token usage: user={user} tokens={total_tokens}",
                    file=sys.stderr,
                )
            # Fall back to raw output if stream-json parsing yielded no text
            if not response:
                response = raw_output
            # Save assistant response to history
            _write_message(user, conv_id, "assistant", response)
            self.send_response(200)
            self.send_header("Content-Type", "application/json; charset=utf-8")
            self.end_headers()
            self.wfile.write(json.dumps({
                "response": response,
                "conversation_id": conv_id,
            }, ensure_ascii=False).encode("utf-8"))
        except FileNotFoundError:
            self.send_error_page(500, "Claude CLI not found")
        except Exception as e:
            self.send_error_page(500, f"Error: {e}")
    # =======================================================================
    # Conversation History Handlers
    # =======================================================================
    def handle_conversation_list(self, user):
        """List all conversations for the logged-in user."""
        conversations = _list_user_conversations(user)
        self.send_response(200)
        self.send_header("Content-Type", "application/json; charset=utf-8")
        self.end_headers()
        self.wfile.write(json.dumps(conversations, ensure_ascii=False).encode("utf-8"))
    def handle_conversation_get(self, user, conv_id):
        """Get a specific conversation for the logged-in user."""
        # Validate conversation_id format
        if not _validate_conversation_id(conv_id):
            self.send_error_page(400, "Invalid conversation ID")
            return
        messages = _read_conversation(user, conv_id)
        if messages is None:
            self.send_error_page(404, "Conversation not found")
            return
        self.send_response(200)
        self.send_header("Content-Type", "application/json; charset=utf-8")
        self.end_headers()
        self.wfile.write(json.dumps(messages, ensure_ascii=False).encode("utf-8"))
    def handle_conversation_delete(self, user, conv_id):
        """Delete a specific conversation for the logged-in user."""
        # Validate conversation_id format
        if not _validate_conversation_id(conv_id):
            self.send_error_page(400, "Invalid conversation ID")
            return
        if _delete_conversation(user, conv_id):
            self.send_response(204)  # No Content
            self.end_headers()
        else:
            self.send_error_page(404, "Conversation not found")
    def handle_new_conversation(self, user):
        """Create a new conversation and return its ID."""
        conv_id = _generate_conversation_id()
        self.send_response(200)
        self.send_header("Content-Type", "application/json; charset=utf-8")
        self.end_headers()
        self.wfile.write(json.dumps({"conversation_id": conv_id}, ensure_ascii=False).encode("utf-8"))
    def do_DELETE(self):
        """Handle DELETE requests."""
        parsed = urlparse(self.path)
        path = parsed.path
        # Delete conversation endpoint
        if path.startswith("/chat/history/"):
            user = self._require_session()
            if not user:
                return
            if not self._check_forwarded_user(user):
                return
            conv_id = path[len("/chat/history/"):]
            self.handle_conversation_delete(user, conv_id)
            return
        # 404 for unknown paths
        self.send_error_page(404, "Not found")
 def main():
    """Start the HTTP server."""
    server_address = (HOST, PORT)
    httpd = HTTPServer(server_address, ChatHandler)
    print(f"Starting disinto-chat server on {HOST}:{PORT}", file=sys.stderr)
    print(f"UI available at http://localhost:{PORT}/chat/", file=sys.stderr)
    if CHAT_OAUTH_CLIENT_ID:
        print(f"OAuth enabled (client_id={CHAT_OAUTH_CLIENT_ID[:8]}...)", file=sys.stderr)
        print(f"Allowed users: {', '.join(sorted(ALLOWED_USERS))}", file=sys.stderr)
    else:
        print("WARNING: CHAT_OAUTH_CLIENT_ID not set - OAuth disabled", file=sys.stderr)
    if FORWARD_AUTH_SECRET:
        print("forward_auth secret configured (#709)", file=sys.stderr)
    else:
        print("WARNING: FORWARD_AUTH_SECRET not set - verify endpoint unrestricted", file=sys.stderr)
    print(
        f"Rate limits (#711): {CHAT_MAX_REQUESTS_PER_HOUR}/hr, "
        f"{CHAT_MAX_REQUESTS_PER_DAY}/day, "
        f"{CHAT_MAX_TOKENS_PER_DAY} tokens/day",
        file=sys.stderr,
    )
    httpd.serve_forever()
 if __name__ == "__main__":
    main()
--- a/docker/chat/ui/index.html
+++ b/docker/chat/ui/index.html
@ -1,521 +0,0 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>disinto-chat</title>
    <script src="/static/htmx.min.js"></script>
    <style>
        * {
            box-sizing: border-box;
            margin: 0;
            padding: 0;
        }
        body {
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, sans-serif;
            background: #1a1a2e;
            color: #eaeaea;
            min-height: 100vh;
            display: flex;
        }
        /* Sidebar styles */
        .sidebar {
            width: 280px;
            background: #16213e;
            border-right: 1px solid #0f3460;
            display: flex;
            flex-direction: column;
            height: 100vh;
            position: fixed;
            left: 0;
            top: 0;
            z-index: 100;
        }
        .sidebar-header {
            padding: 1rem;
            border-bottom: 1px solid #0f3460;
        }
        .sidebar-header h1 {
            font-size: 1.25rem;
            font-weight: 600;
            margin-bottom: 0.5rem;
        }
        .new-chat-btn {
            width: 100%;
            background: #e94560;
            color: white;
            border: none;
            border-radius: 6px;
            padding: 0.75rem 1rem;
            font-size: 0.9rem;
            font-weight: 600;
            cursor: pointer;
            transition: background 0.2s;
        }
        .new-chat-btn:hover {
            background: #d63447;
        }
        .new-chat-btn:disabled {
            background: #555;
            cursor: not-allowed;
        }
        .conversations-list {
            flex: 1;
            overflow-y: auto;
            padding: 0.5rem;
        }
        .conversation-item {
            padding: 0.75rem 1rem;
            border-radius: 6px;
            cursor: pointer;
            margin-bottom: 0.25rem;
            transition: background 0.2s;
            border: 1px solid transparent;
        }
        .conversation-item:hover {
            background: #1a1a2e;
        }
        .conversation-item.active {
            background: #0f3460;
            border-color: #e94560;
        }
        .conversation-item .preview {
            font-size: 0.875rem;
            white-space: nowrap;
            overflow: hidden;
            text-overflow: ellipsis;
            opacity: 0.9;
        }
        .conversation-item .meta {
            font-size: 0.75rem;
            opacity: 0.6;
            margin-top: 0.25rem;
        }
        .conversation-item .message-count {
            float: right;
            font-size: 0.7rem;
            background: #0f3460;
            padding: 0.125rem 0.5rem;
            border-radius: 10px;
        }
        .main-content {
            margin-left: 280px;
            display: flex;
            flex-direction: column;
            width: 100%;
            height: 100vh;
        }
        header {
            background: #16213e;
            padding: 1rem 2rem;
            border-bottom: 1px solid #0f3460;
        }
        header h1 {
            font-size: 1.25rem;
            font-weight: 600;
        }
        main {
            flex: 1;
            display: flex;
            flex-direction: column;
            max-width: 900px;
            margin: 0 auto;
            width: 100%;
            padding: 1rem;
        }
        #messages {
            flex: 1;
            overflow-y: auto;
            padding: 1rem;
            background: #16213e;
            border-radius: 8px;
            margin-bottom: 1rem;
        }
        .message {
            margin-bottom: 1rem;
            padding: 0.75rem 1rem;
            border-radius: 8px;
            line-height: 1.5;
        }
        .message.user {
            background: #0f3460;
            margin-left: 2rem;
        }
        .message.assistant {
            background: #1a1a2e;
            margin-right: 2rem;
        }
        .message.system {
            background: #1a1a2e;
            font-style: italic;
            color: #888;
            text-align: center;
        }
        .message .role {
            font-weight: 600;
            font-size: 0.875rem;
            margin-bottom: 0.25rem;
            opacity: 0.8;
        }
        .message .content {
            white-space: pre-wrap;
            word-wrap: break-word;
        }
        .input-area {
            display: flex;
            gap: 0.5rem;
            padding: 1rem;
            background: #16213e;
            border-radius: 8px;
        }
        textarea {
            flex: 1;
            background: #1a1a2e;
            border: 1px solid #0f3460;
            border-radius: 6px;
            padding: 0.75rem;
            color: #eaeaea;
            font-family: inherit;
            font-size: 1rem;
            resize: none;
            min-height: 80px;
        }
        textarea:focus {
            outline: none;
            border-color: #e94560;
        }
        button {
            background: #e94560;
            color: white;
            border: none;
            border-radius: 6px;
            padding: 0.75rem 1.5rem;
            font-size: 1rem;
            font-weight: 600;
            cursor: pointer;
            transition: background 0.2s;
        }
        button:hover {
            background: #d63447;
        }
        button:disabled {
            background: #555;
            cursor: not-allowed;
        }
        .loading {
            opacity: 0.6;
        }
        .empty-state {
            display: flex;
            flex-direction: column;
            align-items: center;
            justify-content: center;
            height: 100%;
            color: #888;
            text-align: center;
        }
        .empty-state p {
            margin-top: 1rem;
        }
        /* Responsive sidebar toggle */
        .sidebar-toggle {
            display: none;
            position: fixed;
            top: 1rem;
            left: 1rem;
            z-index: 200;
            background: #e94560;
            color: white;
            border: none;
            border-radius: 6px;
            padding: 0.5rem;
            cursor: pointer;
        }
        @media (max-width: 768px) {
            .sidebar {
                transform: translateX(-100%);
                transition: transform 0.3s;
            }
            .sidebar.open {
                transform: translateX(0);
            }
            .sidebar-toggle {
                display: block;
            }
            .main-content {
                margin-left: 0;
            }
        }
    </style>
 </head>
 <body>
    <button class="sidebar-toggle" id="sidebar-toggle">☰</button>
    <aside class="sidebar" id="sidebar">
        <div class="sidebar-header">
            <h1>disinto-chat</h1>
            <button class="new-chat-btn" id="new-chat-btn">+ New Chat</button>
        </div>
        <div class="conversations-list" id="conversations-list">
            <!-- Conversations will be loaded here -->
        </div>
    </aside>
    <div class="main-content">
        <header>
            <h1>disinto-chat</h1>
        </header>
        <main>
            <div id="messages">
                <div class="message system">
                    <div class="role">system</div>
                    <div class="content">Welcome to disinto-chat. Type a message to start chatting with Claude.</div>
                </div>
            </div>
            <form class="input-area" id="chat-form">
                <textarea name="message" placeholder="Type your message..." required></textarea>
                <button type="submit" id="send-btn">Send</button>
            </form>
        </main>
    </div>
    <script>
        // State
        let currentConversationId = null;
        let conversations = [];
        // DOM elements
        const messagesDiv = document.getElementById('messages');
        const sendBtn = document.getElementById('send-btn');
        const textarea = document.querySelector('textarea');
        const conversationsList = document.getElementById('conversations-list');
        const newChatBtn = document.getElementById('new-chat-btn');
        const sidebar = document.getElementById('sidebar');
        const sidebarToggle = document.getElementById('sidebar-toggle');
        // Load conversations list
        async function loadConversations() {
            try {
                const response = await fetch('/chat/history');
                if (response.ok) {
                    conversations = await response.json();
                    renderConversationsList();
                }
            } catch (error) {
                console.error('Failed to load conversations:', error);
            }
        }
        // Render conversations list
        function renderConversationsList() {
            conversationsList.innerHTML = '';
            if (conversations.length === 0) {
                conversationsList.innerHTML = '<div style="padding: 1rem; color: #888; text-align: center; font-size: 0.875rem;">No conversations yet</div>';
                return;
            }
            conversations.forEach(conv => {
                const item = document.createElement('div');
                item.className = 'conversation-item';
                if (conv.id === currentConversationId) {
                    item.classList.add('active');
                }
                item.dataset.conversationId = conv.id;
                const previewDiv = document.createElement('div');
                previewDiv.className = 'preview';
                previewDiv.textContent = conv.preview || '(empty)';
                const metaDiv = document.createElement('div');
                metaDiv.className = 'meta';
                const date = conv.created_at ? new Date(conv.created_at).toLocaleDateString() : '';
                metaDiv.innerHTML = `${date} <span class="message-count">${conv.message_count || 0} msg${conv.message_count !== 1 ? 's' : ''}</span>`;
                item.appendChild(previewDiv);
                item.appendChild(metaDiv);
                item.addEventListener('click', () => loadConversation(conv.id));
                conversationsList.appendChild(item);
            });
        }
        // Load a specific conversation
        async function loadConversation(convId) {
            // Early-return if already showing this conversation
            if (convId === currentConversationId) {
                return;
            }
            // Clear messages
            messagesDiv.innerHTML = '';
            // Update active state in sidebar
            document.querySelectorAll('.conversation-item').forEach(item => {
                item.classList.remove('active');
            });
            document.querySelector(`[data-conversation-id="${convId}"]`)?.classList.add('active');
            currentConversationId = convId;
            try {
                const response = await fetch(`/chat/history/${convId}`);
                if (response.ok) {
                    const messages = await response.json();
                    if (messages && messages.length > 0) {
                        messages.forEach(msg => {
                            addMessage(msg.role, msg.content);
                        });
                    } else {
                        addSystemMessage('This conversation is empty');
                    }
                } else {
                    addSystemMessage('Failed to load conversation');
                }
            } catch (error) {
                console.error('Failed to load conversation:', error);
                addSystemMessage('Error loading conversation');
            }
            // Close sidebar on mobile
            if (window.innerWidth <= 768) {
                sidebar.classList.remove('open');
            }
        }
        // Create a new conversation
        async function createNewConversation() {
            try {
                const response = await fetch('/chat/new', { method: 'POST' });
                if (response.ok) {
                    const data = await response.json();
                    currentConversationId = data.conversation_id;
                    messagesDiv.innerHTML = '';
                    addSystemMessage('New conversation started');
                    await loadConversations();
                } else {
                    addSystemMessage('Failed to create new conversation');
                }
            } catch (error) {
                console.error('Failed to create new conversation:', error);
                addSystemMessage('Error creating new conversation');
            }
        }
        // Add message to display
        function addMessage(role, content, streaming = false) {
            const msgDiv = document.createElement('div');
            msgDiv.className = `message ${role}`;
            msgDiv.innerHTML = `
                <div class="role">${role}</div>
                <div class="content${streaming ? ' streaming' : ''}">${escapeHtml(content)}</div>
            `;
            messagesDiv.appendChild(msgDiv);
            messagesDiv.scrollTop = messagesDiv.scrollHeight;
            return msgDiv.querySelector('.content');
        }
        function addSystemMessage(content) {
            const msgDiv = document.createElement('div');
            msgDiv.className = 'message system';
            msgDiv.innerHTML = `
                <div class="role">system</div>
                <div class="content">${escapeHtml(content)}</div>
            `;
            messagesDiv.appendChild(msgDiv);
            messagesDiv.scrollTop = messagesDiv.scrollHeight;
        }
        function escapeHtml(text) {
            const div = document.createElement('div');
            div.textContent = text;
            return div.innerHTML.replace(/\n/g, '<br>');
        }
        // Send message handler
        async function sendMessage() {
            const message = textarea.value.trim();
            if (!message) return;
            // Disable input
            textarea.disabled = true;
            sendBtn.disabled = true;
            sendBtn.textContent = 'Sending...';
            // Add user message
            addMessage('user', message);
            textarea.value = '';
            // If no conversation ID, create one
            if (!currentConversationId) {
                await createNewConversation();
            }
            try {
                // Use fetch with URLSearchParams for application/x-www-form-urlencoded
                const params = new URLSearchParams();
                params.append('message', message);
                params.append('conversation_id', currentConversationId);
                const response = await fetch('/chat', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/x-www-form-urlencoded'
                    },
                    body: params
                });
                if (!response.ok) {
                    throw new Error(`HTTP ${response.status}`);
                }
                // Read the response as JSON (now returns JSON with response and conversation_id)
                const data = await response.json();
                addMessage('assistant', data.response);
            } catch (error) {
                addSystemMessage(`Error: ${error.message}`);
            } finally {
                textarea.disabled = false;
                sendBtn.disabled = false;
                sendBtn.textContent = 'Send';
                textarea.focus();
                messagesDiv.scrollTop = messagesDiv.scrollHeight;
                // Refresh conversations list
                await loadConversations();
            }
        }
        // Event listeners
        sendBtn.addEventListener('click', sendMessage);
        newChatBtn.addEventListener('click', createNewConversation);
        textarea.addEventListener('keydown', (e) => {
            if (e.key === 'Enter' && !e.shiftKey) {
                e.preventDefault();
                sendMessage();
            }
        });
        // Sidebar toggle for mobile
        sidebarToggle.addEventListener('click', () => {
            sidebar.classList.toggle('open');
        });
        // Close sidebar when clicking outside on mobile
        document.addEventListener('click', (e) => {
            if (window.innerWidth <= 768) {
                if (!sidebar.contains(e.target) && !sidebarToggle.contains(e.target)) {
                    sidebar.classList.remove('open');
                }
            }
        });
        // Initial focus
        textarea.focus();
        // Load conversations on page load
        loadConversations();
    </script>
 </body>
 </html>
--- a/docker/chat/ui/static/htmx.min.js
+++ b/docker/chat/ui/static/htmx.min.js
--- a/docker/edge/Dockerfile
+++ b/docker/edge/Dockerfile
@ -1,7 +1,4 @@
 FROM caddy:latest
-RUN apk add --no-cache bash jq curl git docker-cli python3 openssh-client autossh
+RUN apk add --no-cache bash jq curl git docker-cli python3
 COPY entrypoint-edge.sh /usr/local/bin/entrypoint-edge.sh
 VOLUME /data
 ENTRYPOINT ["bash", "/usr/local/bin/entrypoint-edge.sh"]
--- a/docker/edge/dispatcher.sh
+++ b/docker/edge/dispatcher.sh
@ -8,8 +8,8 @@
 # 2. Scan vault/actions/ for TOML files without .result.json
 # 3. Verify TOML arrived via merged PR with admin merger (Forgejo API)
 # 4. Validate TOML using vault-env.sh validator
-# 5. Decrypt declared secrets via load_secret (lib/env.sh)
+# 5. Decrypt .env.vault.enc and extract only declared secrets
-# 6. Launch: delegate to _launch_runner_{docker,nomad} backend
+# 6. Launch: docker run --rm disinto/agents:latest <action-id>
 # 7. Write <action-id>.result.json with exit code, timestamp, logs summary
 #
 # Part of #76.
@ -19,7 +19,7 @@ set -euo pipefail
 # Resolve script root (parent of lib/)
 SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-# Source shared environment (provides load_secret, log helpers, etc.)
+# Source shared environment
 source "${SCRIPT_ROOT}/../lib/env.sh"
 # Project TOML location: prefer mounted path, fall back to cloned path
@ -27,18 +27,26 @@ source "${SCRIPT_ROOT}/../lib/env.sh"
 # the shallow clone only has .toml.example files.
 PROJECTS_DIR="${PROJECTS_DIR:-${FACTORY_ROOT:-/opt/disinto}-projects}"
-# -----------------------------------------------------------------------------
+# Load vault secrets after env.sh (env.sh unsets them for agent security)
-# Backend selection: DISPATCHER_BACKEND={docker,nomad}
+# Vault secrets must be available to the dispatcher
-# Default: docker.  nomad lands as a pure addition during migration Step 5.
+if [ -f "$FACTORY_ROOT/.env.vault.enc" ] && command -v sops &>/dev/null; then
-# -----------------------------------------------------------------------------
+  set -a
-DISPATCHER_BACKEND="${DISPATCHER_BACKEND:-docker}"
+  eval "$(sops -d --output-type dotenv "$FACTORY_ROOT/.env.vault.enc" 2>/dev/null)" \
    || echo "Warning: failed to decrypt .env.vault.enc — vault secrets not loaded" >&2
  set +a
 elif [ -f "$FACTORY_ROOT/.env.vault" ]; then
  set -a
  # shellcheck source=/dev/null
  source "$FACTORY_ROOT/.env.vault"
  set +a
 fi
 # Ops repo location (vault/actions directory)
 OPS_REPO_ROOT="${OPS_REPO_ROOT:-/home/debian/disinto-ops}"
 VAULT_ACTIONS_DIR="${OPS_REPO_ROOT}/vault/actions"
 # Vault action validation
-VAULT_ENV="${SCRIPT_ROOT}/../action-vault/vault-env.sh"
+VAULT_ENV="${SCRIPT_ROOT}/../vault/vault-env.sh"
 # Admin users who can merge vault PRs (from issue #77)
 # Comma-separated list of Forgejo usernames with admin role
@ -342,231 +350,33 @@ get_dispatch_mode() {
  fi
 }
-# Commit result.json to the ops repo via git push (portable, no bind-mount).
+# Write result file for an action
 #
 # Clones the ops repo into a scratch directory, writes the result file,
 # commits as vault-bot, and pushes to the primary branch.
 # Idempotent: skips if result.json already exists upstream.
 # Retries on push conflict with rebase-and-push (handles concurrent merges).
 #
 # Usage: commit_result_via_git <action_id> <exit_code> <logs>
 commit_result_via_git() {
  local action_id="$1"
  local exit_code="$2"
  local logs="$3"
  local result_relpath="vault/actions/${action_id}.result.json"
  local ops_clone_url="${FORGE_URL}/${FORGE_OPS_REPO}.git"
  local branch="${PRIMARY_BRANCH:-main}"
  local scratch_dir
  scratch_dir=$(mktemp -d /tmp/dispatcher-result-XXXXXX)
  # shellcheck disable=SC2064
  trap "rm -rf '${scratch_dir}'" RETURN
  # Shallow clone of the ops repo — only the primary branch
  if ! git clone --depth 1 --branch "$branch" \
    "$ops_clone_url" "$scratch_dir" 2>/dev/null; then
    log "ERROR: Failed to clone ops repo for result commit (action ${action_id})"
    return 1
  fi
  # Idempotency: skip if result.json already exists upstream
  if [ -f "${scratch_dir}/${result_relpath}" ]; then
    log "Result already exists upstream for ${action_id} — skipping commit"
    return 0
  fi
  # Configure git identity as vault-bot
  git -C "$scratch_dir" config user.name "vault-bot"
  git -C "$scratch_dir" config user.email "vault-bot@disinto.local"
  # Truncate logs if too long (keep last 1000 chars)
  if [ ${#logs} -gt 1000 ]; then
    logs="${logs: -1000}"
  fi
  # Write result JSON via jq (never string-interpolate into JSON)
  mkdir -p "$(dirname "${scratch_dir}/${result_relpath}")"
  jq -n \
    --arg id "$action_id" \
    --argjson exit_code "$exit_code" \
    --arg timestamp "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" \
    --arg logs "$logs" \
    '{id: $id, exit_code: $exit_code, timestamp: $timestamp, logs: $logs}' \
    > "${scratch_dir}/${result_relpath}"
  git -C "$scratch_dir" add "$result_relpath"
  git -C "$scratch_dir" commit -q -m "vault: result for ${action_id}"
  # Push with retry on conflict (rebase-and-push pattern).
  # Common case: admin merges another action PR between our clone and push.
  local attempt
  for attempt in 1 2 3; do
    if git -C "$scratch_dir" push origin "$branch" 2>/dev/null; then
      log "Result committed and pushed for ${action_id} (attempt ${attempt})"
      return 0
    fi
    log "Push conflict for ${action_id} (attempt ${attempt}/3) — rebasing"
    if ! git -C "$scratch_dir" pull --rebase origin "$branch" 2>/dev/null; then
      # Rebase conflict — check if result was pushed by another process
      git -C "$scratch_dir" rebase --abort 2>/dev/null || true
      if git -C "$scratch_dir" fetch origin "$branch" 2>/dev/null && \
         git -C "$scratch_dir" show "origin/${branch}:${result_relpath}" >/dev/null 2>&1; then
        log "Result already exists upstream for ${action_id} (pushed by another process)"
        return 0
      fi
    fi
  done
  log "ERROR: Failed to push result for ${action_id} after 3 attempts"
  return 1
 }
 # Write result file for an action via git push to the ops repo.
 # Usage: write_result <action_id> <exit_code> <logs>
 write_result() {
  local action_id="$1"
  local exit_code="$2"
  local logs="$3"
-  commit_result_via_git "$action_id" "$exit_code" "$logs"
+  local result_file="${VAULT_ACTIONS_DIR}/${action_id}.result.json"
  # Truncate logs if too long (keep last 1000 chars)
  if [ ${#logs} -gt 1000 ]; then
    logs="${logs: -1000}"
  fi
  # Write result JSON
  jq -n \
    --arg id "$action_id" \
    --argjson exit_code "$exit_code" \
    --arg timestamp "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" \
    --arg logs "$logs" \
    '{id: $id, exit_code: $exit_code, timestamp: $timestamp, logs: $logs}' \
    > "$result_file"
  log "Result written: ${result_file}"
 }
-# -----------------------------------------------------------------------------
+# Launch runner for the given action
 # Pluggable launcher backends
 # -----------------------------------------------------------------------------
 # _launch_runner_docker ACTION_ID SECRETS_CSV MOUNTS_CSV
 #
 # Builds and executes a `docker run` command for the vault runner.
 # Secrets are resolved via load_secret (lib/env.sh).
 # Returns: exit code of the docker run.  Stdout/stderr are captured to a temp
 #          log file whose path is printed to stdout (caller reads it).
 _launch_runner_docker() {
  local action_id="$1"
  local secrets_csv="$2"
  local mounts_csv="$3"
  local -a cmd=(docker run --rm
    --name "vault-runner-${action_id}"
    --network host
    --entrypoint bash
    -e DISINTO_CONTAINER=1
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
    -e "FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}"
    -e "FORGE_OPS_REPO=${FORGE_OPS_REPO:-}"
    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
  )
  # Pass through optional env vars if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  if [ -n "${CLAUDE_MODEL:-}" ]; then
    cmd+=(-e "CLAUDE_MODEL=${CLAUDE_MODEL}")
  fi
  # Mount docker socket, claude binary, and claude config
  cmd+=(-v /var/run/docker.sock:/var/run/docker.sock)
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  local runtime_home="${HOME:-/home/debian}"
  if [ -d "${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}" ]; then
    cmd+=(-v "${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}")
    cmd+=(-e "CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}")
  fi
  if [ -f "${runtime_home}/.claude.json" ]; then
    cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
  fi
  # Add environment variables for secrets (resolved via load_secret)
  if [ -n "$secrets_csv" ]; then
    local secret
    for secret in $(echo "$secrets_csv" | tr ',' ' '); do
      secret=$(echo "$secret" | xargs)
      [ -n "$secret" ] || continue
      local secret_val
      secret_val=$(load_secret "$secret") || true
      if [ -z "$secret_val" ]; then
        log "ERROR: Secret '${secret}' could not be resolved for action ${action_id}"
        write_result "$action_id" 1 "Secret not found: ${secret}"
        return 1
      fi
      cmd+=(-e "${secret}=${secret_val}")
    done
  fi
  # Add volume mounts for file-based credentials
  if [ -n "$mounts_csv" ]; then
    local mount_alias
    for mount_alias in $(echo "$mounts_csv" | tr ',' ' '); do
      mount_alias=$(echo "$mount_alias" | xargs)
      [ -n "$mount_alias" ] || continue
      case "$mount_alias" in
        ssh)
          cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
          ;;
        gpg)
          cmd+=(-v "${runtime_home}/.gnupg:/home/agent/.gnupg:ro")
          ;;
        sops)
          cmd+=(-v "${runtime_home}/.config/sops/age:/home/agent/.config/sops/age:ro")
          ;;
        *)
          log "ERROR: Unknown mount alias '${mount_alias}' for action ${action_id}"
          write_result "$action_id" 1 "Unknown mount alias: ${mount_alias}"
          return 1
          ;;
      esac
    done
  fi
  # Mount the ops repo so the runner entrypoint can read the action TOML
  cmd+=(-v "${OPS_REPO_ROOT}:/home/agent/ops:ro")
  # Image and entrypoint arguments: runner entrypoint + action-id
  cmd+=(disinto/agents:latest /home/agent/disinto/docker/runner/entrypoint-runner.sh "$action_id")
  log "Running: docker run --rm vault-runner-${action_id} (secrets: ${secrets_csv:-none}, mounts: ${mounts_csv:-none})"
  # Create temp file for logs
  local log_file
  log_file=$(mktemp /tmp/dispatcher-logs-XXXXXX)
  trap 'rm -f "$log_file"' RETURN
  # Execute with array expansion (safe from shell injection)
  "${cmd[@]}" > "$log_file" 2>&1
  local exit_code=$?
  # Read logs summary
  local logs
  logs=$(cat "$log_file")
  # Write result file
  write_result "$action_id" "$exit_code" "$logs"
  if [ $exit_code -eq 0 ]; then
    log "Runner completed successfully for action: ${action_id}"
  else
    log "Runner failed for action: ${action_id} (exit code: ${exit_code})"
  fi
  return $exit_code
 }
 # _launch_runner_nomad ACTION_ID SECRETS_CSV MOUNTS_CSV
 #
 # Nomad backend stub — will be implemented in migration Step 5.
 _launch_runner_nomad() {
  echo "nomad backend not yet implemented" >&2
  return 1
 }
 # Launch runner for the given action (backend-agnostic orchestrator)
 # Usage: launch_runner <toml_file>
 launch_runner() {
  local toml_file="$1"
@ -599,94 +409,123 @@ launch_runner() {
    log "Action ${action_id}: admin merge verified"
  fi
-  # Build CSV lists from validated action metadata
+  # Extract secrets from validated action
-  local secrets_csv=""
+  local secrets_array
-  if [ -n "${VAULT_ACTION_SECRETS:-}" ]; then
+  secrets_array="${VAULT_ACTION_SECRETS:-}"
    # Convert space-separated to comma-separated
    secrets_csv=$(echo "${VAULT_ACTION_SECRETS}" | xargs | tr ' ' ',')
  fi
  local mounts_csv=""
  if [ -n "${VAULT_ACTION_MOUNTS:-}" ]; then
    mounts_csv=$(echo "${VAULT_ACTION_MOUNTS}" | xargs | tr ' ' ',')
  fi
  # Delegate to the selected backend
  "_launch_runner_${DISPATCHER_BACKEND}" "$action_id" "$secrets_csv" "$mounts_csv"
 }
 # -----------------------------------------------------------------------------
 # Pluggable sidecar launcher (reproduce / triage / verify)
 # -----------------------------------------------------------------------------
 # _dispatch_sidecar_docker CONTAINER_NAME ISSUE_NUM PROJECT_TOML IMAGE [FORMULA]
 #
 # Launches a sidecar container via docker run (background, pid-tracked).
 # Prints the background PID to stdout.
 _dispatch_sidecar_docker() {
  local container_name="$1"
  local issue_number="$2"
  local project_toml="$3"
  local image="$4"
  local formula="${5:-}"
  # Build docker run command (self-contained, no compose context needed).
  # The edge container has the Docker socket but not the host's compose project,
  # so docker compose run would fail with exit 125. docker run is self-contained:
  # the dispatcher knows the image, network, env vars, and entrypoint.
  local -a cmd=(docker run --rm
-    --name "${container_name}"
+    --name "vault-runner-${action_id}"
    --network host
-    --security-opt apparmor=unconfined
+    --entrypoint bash
-    -v /var/run/docker.sock:/var/run/docker.sock
+    -e DISINTO_CONTAINER=1
    -v agent-data:/home/agent/data
    -v project-repos:/home/agent/repos
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
-    -e "FORGE_REPO=${FORGE_REPO}"
+    -e "FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}"
    -e "FORGE_OPS_REPO=${FORGE_OPS_REPO:-}"
    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
    -e DISINTO_CONTAINER=1
  )
-  # Set formula if provided
+  # Pass through optional env vars if set
  if [ -n "$formula" ]; then
    cmd+=(-e "DISINTO_FORMULA=${formula}")
  fi
  # Pass through ANTHROPIC_API_KEY if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  if [ -n "${CLAUDE_MODEL:-}" ]; then
    cmd+=(-e "CLAUDE_MODEL=${CLAUDE_MODEL}")
  fi
-  # Mount shared Claude config dir and ~/.ssh from the runtime user's home
+  # Mount docker socket, claude binary, and claude config
  cmd+=(-v /var/run/docker.sock:/var/run/docker.sock)
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  local runtime_home="${HOME:-/home/debian}"
-  if [ -d "${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}" ]; then
+  if [ -d "${runtime_home}/.claude" ]; then
-    cmd+=(-v "${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}")
+    cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
    cmd+=(-e "CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}")
  fi
  if [ -f "${runtime_home}/.claude.json" ]; then
    cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
  fi
-  if [ -d "${runtime_home}/.ssh" ]; then
+
-    cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
+  # Add environment variables for secrets (if any declared)
-  fi
+  if [ -n "$secrets_array" ]; then
-  if [ -f /usr/local/bin/claude ]; then
+    for secret in $secrets_array; do
-    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
+      secret=$(echo "$secret" | xargs)
      if [ -n "$secret" ]; then
        # Verify secret exists in vault
        if [ -z "${!secret:-}" ]; then
          log "ERROR: Secret '${secret}' not found in vault for action ${action_id}"
          write_result "$action_id" 1 "Secret not found in vault: ${secret}"
          return 1
        fi
        cmd+=(-e "${secret}=${!secret}")
      fi
    done
  else
    log "Action ${action_id} has no secrets declared — runner will execute without extra env vars"
  fi
-  # Mount the project TOML into the container at a stable path
+  # Add volume mounts for file-based credentials (if any declared)
-  local container_toml="/home/agent/project.toml"
+  local mounts_array
-  cmd+=(-v "${project_toml}:${container_toml}:ro")
+  mounts_array="${VAULT_ACTION_MOUNTS:-}"
  if [ -n "$mounts_array" ]; then
    for mount_alias in $mounts_array; do
      mount_alias=$(echo "$mount_alias" | xargs)
      [ -n "$mount_alias" ] || continue
      case "$mount_alias" in
        ssh)
          cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
          ;;
        gpg)
          cmd+=(-v "${runtime_home}/.gnupg:/home/agent/.gnupg:ro")
          ;;
        sops)
          cmd+=(-v "${runtime_home}/.config/sops/age:/home/agent/.config/sops/age:ro")
          ;;
        *)
          log "ERROR: Unknown mount alias '${mount_alias}' for action ${action_id}"
          write_result "$action_id" 1 "Unknown mount alias: ${mount_alias}"
          return 1
          ;;
      esac
    done
  fi
-  cmd+=("${image}" "$container_toml" "$issue_number")
+  # Mount the ops repo so the runner entrypoint can read the action TOML
  cmd+=(-v "${OPS_REPO_ROOT}:/home/agent/ops:ro")
-  # Launch in background
+  # Image and entrypoint arguments: runner entrypoint + action-id
-  "${cmd[@]}" &
+  cmd+=(disinto/agents:latest /home/agent/disinto/docker/runner/entrypoint-runner.sh "$action_id")
  echo $!
 }
-# _dispatch_sidecar_nomad CONTAINER_NAME ISSUE_NUM PROJECT_TOML IMAGE [FORMULA]
+  log "Running: docker run --rm vault-runner-${action_id} (secrets: ${secrets_array:-none}, mounts: ${mounts_array:-none})"
-#
+
-# Nomad sidecar backend stub — will be implemented in migration Step 5.
+  # Create temp file for logs
-_dispatch_sidecar_nomad() {
+  local log_file
-  echo "nomad backend not yet implemented" >&2
+  log_file=$(mktemp /tmp/dispatcher-logs-XXXXXX)
-  return 1
+  trap 'rm -f "$log_file"' RETURN
  # Execute with array expansion (safe from shell injection)
  # Capture stdout and stderr to log file
  "${cmd[@]}" > "$log_file" 2>&1
  local exit_code=$?
  # Read logs summary
  local logs
  logs=$(cat "$log_file")
  # Write result file
  write_result "$action_id" "$exit_code" "$logs"
  if [ $exit_code -eq 0 ]; then
    log "Runner completed successfully for action: ${action_id}"
  else
    log "Runner failed for action: ${action_id} (exit code: ${exit_code})"
  fi
  return $exit_code
 }
 # -----------------------------------------------------------------------------
@ -767,13 +606,51 @@ dispatch_reproduce() {
  log "Dispatching reproduce-agent for issue #${issue_number} (project: ${project_toml})"
-  local bg_pid
+  # Build docker run command using array (safe from injection)
-  bg_pid=$("_dispatch_sidecar_${DISPATCHER_BACKEND}" \
+  local -a cmd=(docker run --rm
-    "disinto-reproduce-${issue_number}" \
+    --name "disinto-reproduce-${issue_number}"
-    "$issue_number" \
+    --network host
-    "$project_toml" \
+    --security-opt apparmor=unconfined
-    "disinto-reproduce:latest")
+    -v /var/run/docker.sock:/var/run/docker.sock
    -v agent-data:/home/agent/data
    -v project-repos:/home/agent/repos
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
    -e "FORGE_REPO=${FORGE_REPO}"
    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
    -e DISINTO_CONTAINER=1
  )
  # Pass through ANTHROPIC_API_KEY if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  # Mount ~/.claude and ~/.ssh from the runtime user's home if available
  local runtime_home="${HOME:-/home/debian}"
  if [ -d "${runtime_home}/.claude" ]; then
    cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
  fi
  if [ -f "${runtime_home}/.claude.json" ]; then
    cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
  fi
  if [ -d "${runtime_home}/.ssh" ]; then
    cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
  fi
  # Mount claude CLI binary if present on host
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  # Mount the project TOML into the container at a stable path
  local container_toml="/home/agent/project.toml"
  cmd+=(-v "${project_toml}:${container_toml}:ro")
  cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
  # Launch in background; write pid-file so we don't double-launch
  "${cmd[@]}" &
  local bg_pid=$!
  echo "$bg_pid" > "$(_reproduce_lockfile "$issue_number")"
  log "Reproduce container launched (pid ${bg_pid}) for issue #${issue_number}"
 }
@ -853,14 +730,52 @@ dispatch_triage() {
  log "Dispatching triage-agent for issue #${issue_number} (project: ${project_toml})"
-  local bg_pid
+  # Build docker run command using array (safe from injection)
-  bg_pid=$("_dispatch_sidecar_${DISPATCHER_BACKEND}" \
+  local -a cmd=(docker run --rm
-    "disinto-triage-${issue_number}" \
+    --name "disinto-triage-${issue_number}"
-    "$issue_number" \
+    --network host
-    "$project_toml" \
+    --security-opt apparmor=unconfined
-    "disinto-reproduce:latest" \
+    -v /var/run/docker.sock:/var/run/docker.sock
-    "triage")
+    -v agent-data:/home/agent/data
    -v project-repos:/home/agent/repos
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
    -e "FORGE_REPO=${FORGE_REPO}"
    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
    -e DISINTO_CONTAINER=1
    -e DISINTO_FORMULA=triage
  )
  # Pass through ANTHROPIC_API_KEY if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  # Mount ~/.claude and ~/.ssh from the runtime user's home if available
  local runtime_home="${HOME:-/home/debian}"
  if [ -d "${runtime_home}/.claude" ]; then
    cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
  fi
  if [ -f "${runtime_home}/.claude.json" ]; then
    cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
  fi
  if [ -d "${runtime_home}/.ssh" ]; then
    cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
  fi
  # Mount claude CLI binary if present on host
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  # Mount the project TOML into the container at a stable path
  local container_toml="/home/agent/project.toml"
  cmd+=(-v "${project_toml}:${container_toml}:ro")
  cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
  # Launch in background; write pid-file so we don't double-launch
  "${cmd[@]}" &
  local bg_pid=$!
  echo "$bg_pid" > "$(_triage_lockfile "$issue_number")"
  log "Triage container launched (pid ${bg_pid}) for issue #${issue_number}"
 }
@ -1016,14 +931,52 @@ dispatch_verify() {
  log "Dispatching verification-agent for issue #${issue_number} (project: ${project_toml})"
-  local bg_pid
+  # Build docker run command using array (safe from injection)
-  bg_pid=$("_dispatch_sidecar_${DISPATCHER_BACKEND}" \
+  local -a cmd=(docker run --rm
-    "disinto-verify-${issue_number}" \
+    --name "disinto-verify-${issue_number}"
-    "$issue_number" \
+    --network host
-    "$project_toml" \
+    --security-opt apparmor=unconfined
-    "disinto-reproduce:latest" \
+    -v /var/run/docker.sock:/var/run/docker.sock
-    "verify")
+    -v agent-data:/home/agent/data
    -v project-repos:/home/agent/repos
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
    -e "FORGE_REPO=${FORGE_REPO}"
    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
    -e DISINTO_CONTAINER=1
    -e DISINTO_FORMULA=verify
  )
  # Pass through ANTHROPIC_API_KEY if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  # Mount ~/.claude and ~/.ssh from the runtime user's home if available
  local runtime_home="${HOME:-/home/debian}"
  if [ -d "${runtime_home}/.claude" ]; then
    cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
  fi
  if [ -f "${runtime_home}/.claude.json" ]; then
    cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
  fi
  if [ -d "${runtime_home}/.ssh" ]; then
    cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
  fi
  # Mount claude CLI binary if present on host
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  # Mount the project TOML into the container at a stable path
  local container_toml="/home/agent/project.toml"
  cmd+=(-v "${project_toml}:${container_toml}:ro")
  cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
  # Launch in background; write pid-file so we don't double-launch
  "${cmd[@]}" &
  local bg_pid=$!
  echo "$bg_pid" > "$(_verify_lockfile "$issue_number")"
  log "Verification container launched (pid ${bg_pid}) for issue #${issue_number}"
 }
@ -1045,25 +998,10 @@ ensure_ops_repo() {
 # Main dispatcher loop
 main() {
-  log "Starting dispatcher (backend=${DISPATCHER_BACKEND})..."
+  log "Starting dispatcher..."
  log "Polling ops repo: ${VAULT_ACTIONS_DIR}"
  log "Admin users: ${ADMIN_USERS}"
  # Validate backend selection at startup
  case "$DISPATCHER_BACKEND" in
    docker) ;;
    nomad)
      log "ERROR: nomad backend not yet implemented"
      echo "nomad backend not yet implemented" >&2
      exit 1
      ;;
    *)
      log "ERROR: unknown DISPATCHER_BACKEND=${DISPATCHER_BACKEND}"
      echo "unknown DISPATCHER_BACKEND=${DISPATCHER_BACKEND} (expected: docker, nomad)" >&2
      exit 1
      ;;
  esac
  while true; do
    # Refresh ops repo at the start of each poll cycle
    ensure_ops_repo
--- a/docker/edge/entrypoint-edge.sh
+++ b/docker/edge/entrypoint-edge.sh
@ -1,9 +1,8 @@
 #!/usr/bin/env bash
 set -euo pipefail
-# Set USER and HOME before sourcing env.sh — preconditions for lib/env.sh (#674).
+# Set USER before sourcing env.sh (Alpine doesn't set USER)
-export USER="${USER:-agent}"
+export USER="${USER:-root}"
 export HOME="${HOME:-/home/agent}"
 FORGE_URL="${FORGE_URL:-http://forgejo:3000}"
@ -37,132 +36,21 @@ if [ -z "${FORGE_REPO:-}" ]; then
  fi
 fi
-# Detect bind-mount of a non-git directory before attempting clone
+# Shallow clone at the pinned version (inject token to support auth-required Forgejo)
-if [ -d /opt/disinto ] && [ ! -d /opt/disinto/.git ] && [ -n "$(ls -A /opt/disinto 2>/dev/null)" ]; then
+if [ ! -d /opt/disinto/.git ]; then
-  echo "FATAL: /opt/disinto contains files but no .git directory." >&2
+  _auth_url=$(printf '%s' "$FORGE_URL" | sed "s|://|://token:${FORGE_TOKEN}@|")
-  echo "If you bind-mounted a directory at /opt/disinto, ensure it is a git working tree." >&2
+  git clone --depth 1 --branch "${DISINTO_VERSION:-main}" "${_auth_url}/${FORGE_REPO}.git" /opt/disinto
  echo "Sleeping 60s before exit to throttle the restart loop..." >&2
  sleep 60
  exit 1
 fi
-# Set HOME early so credential helper and git config land in the right place.
+# Set HOME so that claude OAuth credentials and session.lock are found at the
 # same in-container path as in disinto-agents (/home/agent/.claude), which makes
 # flock cross-serialize across containers on the same host inode.
 export HOME=/home/agent
 mkdir -p "$HOME"
 # Configure git credential helper before cloning (#604).
 # /opt/disinto does not exist yet so we cannot source lib/git-creds.sh;
 # inline a minimal credential-helper setup here.
 if [ -n "${FORGE_PASS:-}" ] && [ -n "${FORGE_URL:-}" ]; then
  _forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||')
  _forge_proto=$(printf '%s' "$FORGE_URL" | sed 's|://.*||')
  _bot_user=""
  if [ -n "${FORGE_TOKEN:-}" ]; then
    _bot_user=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
      "${FORGE_URL}/api/v1/user" 2>/dev/null | jq -r '.login // empty') || _bot_user=""
  fi
  _bot_user="${_bot_user:-dev-bot}"
  cat > "${HOME}/.git-credentials-helper" <<CREDEOF
 #!/bin/sh
 # Reads \$FORGE_PASS from env at runtime — file is safe to read on disk.
 [ "\$1" = "get" ] || exit 0
 cat >/dev/null
 echo "protocol=${_forge_proto}"
 echo "host=${_forge_host}"
 echo "username=${_bot_user}"
 echo "password=\$FORGE_PASS"
 CREDEOF
  chmod 755 "${HOME}/.git-credentials-helper"
  git config --global credential.helper "${HOME}/.git-credentials-helper"
  git config --global --add safe.directory '*'
 fi
 # Shallow clone at the pinned version — use clean URL, credential helper
 # supplies auth (#604).
 # Retry with exponential backoff — forgejo may still be starting (#665).
 if [ ! -d /opt/disinto/.git ]; then
  echo "edge: cloning ${FORGE_URL}/${FORGE_REPO} (branch ${DISINTO_VERSION:-main})..." >&2
  _clone_ok=false
  _backoff=2
  _max_backoff=30
  _max_attempts=10
  for _attempt in $(seq 1 "$_max_attempts"); do
    if git clone --depth 1 --branch "${DISINTO_VERSION:-main}" "${FORGE_URL}/${FORGE_REPO}.git" /opt/disinto 2>&1; then
      _clone_ok=true
      break
    fi
    rm -rf /opt/disinto  # clean up partial clone before retry
    if [ "$_attempt" -lt "$_max_attempts" ]; then
      echo "edge: clone attempt ${_attempt}/${_max_attempts} failed, retrying in ${_backoff}s..." >&2
      sleep "$_backoff"
      _backoff=$(( _backoff * 2 ))
      if [ "$_backoff" -gt "$_max_backoff" ]; then _backoff=$_max_backoff; fi
    fi
  done
  if [ "$_clone_ok" != "true" ]; then
    echo >&2
    echo "FATAL: failed to clone ${FORGE_URL}/${FORGE_REPO}.git (branch ${DISINTO_VERSION:-main}) after ${_max_attempts} attempts" >&2
    echo "Likely causes:" >&2
    echo "  - Forgejo at ${FORGE_URL} is unreachable from the edge container" >&2
    echo "  - Repository '${FORGE_REPO}' does not exist on this forge" >&2
    echo "  - FORGE_TOKEN/FORGE_PASS is invalid or has no read access to '${FORGE_REPO}'" >&2
    echo "  - Branch '${DISINTO_VERSION:-main}' does not exist in '${FORGE_REPO}'" >&2
    echo "Workaround: bind-mount a local git checkout into /opt/disinto." >&2
    echo "Sleeping 60s before exit to throttle the restart loop..." >&2
    sleep 60
    exit 1
  fi
 fi
 # Repair any legacy baked-credential URLs in /opt/disinto (#604).
 # Now that /opt/disinto exists, source the shared lib.
 if [ -f /opt/disinto/lib/git-creds.sh ]; then
  # shellcheck source=/opt/disinto/lib/git-creds.sh
  source /opt/disinto/lib/git-creds.sh
  _GIT_CREDS_LOG_FN="echo" repair_baked_cred_urls /opt/disinto
 fi
 # Ensure log directory exists
 mkdir -p /opt/disinto-logs
 # ── Reverse tunnel (optional) ──────────────────────────────────────────
 # When EDGE_TUNNEL_HOST is set, open a single reverse-SSH forward so the
 # DO edge box can reach this container's Caddy on the project's assigned port.
 # Guarded: if EDGE_TUNNEL_HOST is empty/unset the block is skipped entirely,
 # keeping local-only dev working without errors.
 if [ -n "${EDGE_TUNNEL_HOST:-}" ]; then
  _tunnel_key="/run/secrets/tunnel_key"
  if [ ! -f "$_tunnel_key" ]; then
    echo "WARN: EDGE_TUNNEL_HOST is set but ${_tunnel_key} is missing — skipping tunnel" >&2
  else
    # Ensure correct permissions (bind-mount may arrive as 644)
    chmod 0400 "$_tunnel_key" 2>/dev/null || true
    : "${EDGE_TUNNEL_USER:=tunnel}"
    : "${EDGE_TUNNEL_PORT:?EDGE_TUNNEL_PORT must be set when EDGE_TUNNEL_HOST is set}"
    export AUTOSSH_GATETIME=0   # don't exit if the first attempt fails quickly
    autossh -M 0 -N -f \
      -o StrictHostKeyChecking=accept-new \
      -o ServerAliveInterval=30 \
      -o ServerAliveCountMax=3 \
      -o ExitOnForwardFailure=yes \
      -i "$_tunnel_key" \
      -R "127.0.0.1:${EDGE_TUNNEL_PORT}:localhost:80" \
      "${EDGE_TUNNEL_USER}@${EDGE_TUNNEL_HOST}"
    echo "edge: reverse tunnel → ${EDGE_TUNNEL_HOST}:${EDGE_TUNNEL_PORT}" >&2
  fi
 fi
 # Set project context vars for scripts that source lib/env.sh (#674).
 # These satisfy env.sh's preconditions for edge-container scripts.
 export PROJECT_REPO_ROOT="${PROJECT_REPO_ROOT:-/opt/disinto}"
 export PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}"
 export OPS_REPO_ROOT="${OPS_REPO_ROOT:-/home/agent/repos/${PROJECT_NAME:-disinto}-ops}"
 # Start dispatcher in background
 bash /opt/disinto/docker/edge/dispatcher.sh &
@ -173,67 +61,6 @@ PROJECT_TOML="${PROJECT_TOML:-projects/disinto.toml}"
  sleep 1200  # 20 minutes
 done) &
 # ── Load required secrets from secrets/*.enc (#777) ────────────────────
 # Edge container declares its required secrets; missing ones cause a hard fail.
 _AGE_KEY_FILE="${HOME}/.config/sops/age/keys.txt"
 _SECRETS_DIR="/opt/disinto/secrets"
 EDGE_REQUIRED_SECRETS="CADDY_SSH_KEY CADDY_SSH_HOST CADDY_SSH_USER CADDY_ACCESS_LOG"
 _edge_decrypt_secret() {
  local enc_path="${_SECRETS_DIR}/${1}.enc"
  [ -f "$enc_path" ] || return 1
  age -d -i "$_AGE_KEY_FILE" "$enc_path" 2>/dev/null
 }
 if [ -f "$_AGE_KEY_FILE" ] && [ -d "$_SECRETS_DIR" ]; then
  _missing=""
  for _secret_name in $EDGE_REQUIRED_SECRETS; do
    _val=$(_edge_decrypt_secret "$_secret_name") || { _missing="${_missing} ${_secret_name}"; continue; }
    export "$_secret_name=$_val"
  done
  if [ -n "$_missing" ]; then
    echo "FATAL: required secrets missing from secrets/*.enc:${_missing}" >&2
    echo "  Run 'disinto secrets add <NAME>' for each missing secret." >&2
    echo "  If migrating from .env.vault.enc, run 'disinto secrets migrate-from-vault' first." >&2
    exit 1
  fi
  echo "edge: loaded required secrets: ${EDGE_REQUIRED_SECRETS}" >&2
 else
  echo "FATAL: age key (${_AGE_KEY_FILE}) or secrets dir (${_SECRETS_DIR}) not found — cannot load required secrets" >&2
  echo "  Ensure age is installed and secrets/*.enc files are present." >&2
  exit 1
 fi
 # Start daily engagement collection cron loop in background (#745)
 # Runs collect-engagement.sh daily at ~23:50 UTC via a sleep loop that
 # calculates seconds until the next 23:50 window. SSH key from secrets/*.enc (#777).
 (while true; do
  # Calculate seconds until next 23:50 UTC
  _now=$(date -u +%s)
  _target=$(date -u -d "today 23:50" +%s 2>/dev/null || date -u -d "23:50" +%s 2>/dev/null || echo 0)
  if [ "$_target" -le "$_now" ]; then
    _target=$(( _target + 86400 ))
  fi
  _sleep_secs=$(( _target - _now ))
  echo "edge: collect-engagement scheduled in ${_sleep_secs}s (next 23:50 UTC)" >&2
  sleep "$_sleep_secs"
  _fetch_log="/tmp/caddy-access-log-fetch.log"
  _ssh_key_file=$(mktemp)
  printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file"
  chmod 0600 "$_ssh_key_file"
  scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes \
    "${CADDY_SSH_USER}@${CADDY_SSH_HOST}:${CADDY_ACCESS_LOG}" \
    "$_fetch_log" 2>&1 | tee -a /opt/disinto-logs/collect-engagement.log || true
  rm -f "$_ssh_key_file"
  if [ -s "$_fetch_log" ]; then
    CADDY_ACCESS_LOG="$_fetch_log" bash /opt/disinto/site/collect-engagement.sh 2>&1 \
      | tee -a /opt/disinto-logs/collect-engagement.log || true
  else
    echo "edge: collect-engagement: fetched log is empty, skipping parse" >&2
  fi
  rm -f "$_fetch_log"
 done) &
 # Caddy as main process — run in foreground via wait so background jobs survive
 # (exec replaces the shell, which can orphan backgrounded subshells)
 caddy run --config /etc/caddy/Caddyfile --adapter caddyfile &
--- a/docker/reproduce/Dockerfile
+++ b/docker/reproduce/Dockerfile
@ -7,8 +7,5 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 RUN useradd -m -u 1000 -s /bin/bash agent
 COPY docker/reproduce/entrypoint-reproduce.sh /entrypoint-reproduce.sh
 RUN chmod +x /entrypoint-reproduce.sh
 VOLUME /home/agent/data
 VOLUME /home/agent/repos
 WORKDIR /home/agent
 ENTRYPOINT ["/entrypoint-reproduce.sh"]
--- a/docker/reproduce/entrypoint-reproduce.sh
+++ b/docker/reproduce/entrypoint-reproduce.sh
@ -15,7 +15,7 @@
 # Volumes expected:
 #   /home/agent/data          — agent-data volume (stack-lock files go here)
 #   /home/agent/repos         — project-repos volume
-#   $CLAUDE_CONFIG_DIR        — shared Claude config dir (OAuth credentials)
+#   /home/agent/.claude       — host ~/.claude (OAuth credentials)
 #   /home/agent/.ssh          — host ~/.ssh (read-only)
 #   /usr/local/bin/claude     — host claude CLI binary (read-only)
 #   /var/run/docker.sock      — host docker socket
@ -84,19 +84,6 @@ export DISINTO_CONTAINER=1
 export HOME="${HOME:-/home/agent}"
 export USER="${USER:-agent}"
 # Set project context vars for lib/env.sh surface contract (#674).
 # PROJECT_NAME and PROJECT_REPO_ROOT are set below after TOML parsing.
 export PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}"
 # Configure git credential helper so reproduce/triage agents can clone/push
 # without needing tokens embedded in remote URLs (#604).
 if [ -f "${DISINTO_DIR}/lib/git-creds.sh" ]; then
  # shellcheck source=lib/git-creds.sh
  source "${DISINTO_DIR}/lib/git-creds.sh"
  # shellcheck disable=SC2119  # no args intended — uses defaults
  configure_git_creds
 fi
 FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
 # Load project name from TOML
@ -111,8 +98,6 @@ with open(sys.argv[1], 'rb') as f:
 export PROJECT_NAME
 PROJECT_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}"
 export PROJECT_REPO_ROOT
 export OPS_REPO_ROOT="${OPS_REPO_ROOT:-/home/agent/repos/${PROJECT_NAME}-ops}"
 if [ "$AGENT_TYPE" = "triage" ]; then
  log "Starting triage-agent for issue #${ISSUE_NUMBER} (project: ${PROJECT_NAME})"
--- a/docker/runner/entrypoint-runner.sh
+++ b/docker/runner/entrypoint-runner.sh
@ -23,15 +23,6 @@ log() {
  printf '[%s] runner: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$*"
 }
 # Configure git credential helper so formulas can clone/push without
 # needing tokens embedded in remote URLs (#604).
 if [ -f "${FACTORY_ROOT}/lib/git-creds.sh" ]; then
  # shellcheck source=lib/git-creds.sh
  source "${FACTORY_ROOT}/lib/git-creds.sh"
  # shellcheck disable=SC2119  # no args intended — uses defaults
  configure_git_creds
 fi
 # ── Argument parsing ─────────────────────────────────────────────────────
 action_id="${1:-}"
--- a/docs/CLAUDE-AUTH-CONCURRENCY.md
+++ b/docs/CLAUDE-AUTH-CONCURRENCY.md
@ -1,138 +0,0 @@
 # Claude Code OAuth Concurrency Model
 ## Problem statement
 The factory runs multiple concurrent Claude Code processes across
 containers. OAuth access tokens are short-lived; refresh tokens rotate
 on each use. If two processes POST the same refresh token to Anthropic's
 token endpoint simultaneously, only one wins — the other gets
 `invalid_grant` and the operator is forced to re-login.
 Claude Code already serializes OAuth refreshes internally using
 `proper-lockfile` (`src/utils/auth.ts:1485-1491`):
 ```typescript
 release = await lockfile.lock(claudeDir)
 ```
 `proper-lockfile` creates a lockfile via an atomic `mkdir(${path}.lock)`
 call — a cross-process primitive that works across any number of
 processes on the same filesystem. The problem was never the lock
 implementation; it was that our old per-container bind-mount layout
 (`~/.claude` mounted but `/home/agent/` container-local) caused each
 container to compute a different lockfile path, so the locks never
 coordinated.
 ## The fix: shared `CLAUDE_CONFIG_DIR`
 `CLAUDE_CONFIG_DIR` is an officially supported env var in Claude Code
 (`src/utils/envUtils.ts`). It controls where Claude resolves its config
 directory instead of the default `~/.claude`.
 By setting `CLAUDE_CONFIG_DIR` to a path on a shared bind mount, every
 container computes the **same** lockfile location. `proper-lockfile`'s
 atomic `mkdir(${CLAUDE_CONFIG_DIR}.lock)` then gives free cross-container
 serialization — no external wrapper needed.
 ## Current layout
 ```
 Host filesystem:
  /var/lib/disinto/claude-shared/          ← CLAUDE_SHARED_DIR
  └── config/                              ← CLAUDE_CONFIG_DIR
      ├── .credentials.json
      ├── settings.json
      └── ...
 Inside every container:
  Same absolute path: /var/lib/disinto/claude-shared/config
  Env: CLAUDE_CONFIG_DIR=/var/lib/disinto/claude-shared/config
 ```
 The shared directory is mounted at the **same absolute path** inside
 every container, so `proper-lockfile` resolves an identical lock path
 everywhere.
 ### Where these values are defined
 | What | Where |
 |------|-------|
 | Defaults for `CLAUDE_SHARED_DIR`, `CLAUDE_CONFIG_DIR` | `lib/env.sh:138-140` |
 | `.env` documentation | `.env.example:92-99` |
 | Container mounts + env passthrough (edge dispatcher) | `docker/edge/dispatcher.sh:446-448` (and analogous blocks for reproduce, triage, verify) |
 | Auth detection using `CLAUDE_CONFIG_DIR` | `docker/agents/entrypoint.sh:101-102` |
 | Bootstrap / migration during `disinto init` | `lib/claude-config.sh:setup_claude_config_dir()`, `bin/disinto:952-962` |
 ## Migration for existing dev boxes
 For operators upgrading from the old `~/.claude` bind-mount layout,
 `disinto init` handles the migration interactively (or with `--yes`).
 The manual equivalent is:
 ```bash
 # 1. Stop the factory
 disinto down
 # 2. Create the shared directory
 mkdir -p /var/lib/disinto/claude-shared
 # 3. Move existing config
 mv "$HOME/.claude" /var/lib/disinto/claude-shared/config
 # 4. Create a back-compat symlink so host-side claude still works
 ln -sfn /var/lib/disinto/claude-shared/config "$HOME/.claude"
 # 5. Export the env var (add to shell rc for persistence)
 export CLAUDE_CONFIG_DIR=/var/lib/disinto/claude-shared/config
 # 6. Start the factory
 disinto up
 ```
 ## Verification
 Watch for these analytics events during concurrent agent runs:
 | Event | Meaning |
 |-------|---------|
 | `tengu_oauth_token_refresh_lock_acquiring` | A process is attempting to acquire the refresh lock |
 | `tengu_oauth_token_refresh_lock_acquired` | Lock acquired; refresh proceeding |
 | `tengu_oauth_token_refresh_lock_retry` | Lock is held by another process; retrying |
 | `tengu_oauth_token_refresh_lock_race_resolved` | Contention detected and resolved normally |
 | `tengu_oauth_token_refresh_lock_retry_limit_reached` | Lock acquisition failed after all retries |
 **Healthy:** `_race_resolved` appearing during contention windows — this
 means multiple processes tried to refresh simultaneously and the lock
 correctly serialized them.
 **Bad:** `_lock_retry_limit_reached` — indicates the lock is stuck or
 the shared mount is not working. Verify that `CLAUDE_CONFIG_DIR` resolves
 to the same path in all containers and that the filesystem supports
 `mkdir` atomicity (any POSIX filesystem does).
 ## The deferred external `flock` wrapper
 `lib/agent-sdk.sh:139,144` still wraps every `claude` invocation in an
 external `flock` on `${HOME}/.claude/session.lock`:
 ```bash
 local lock_file="${HOME}/.claude/session.lock"
 ...
 output=$(cd "$run_dir" && ( flock -w 600 9 || exit 1;
  claude_run_with_watchdog claude "${args[@]}" ) 9>"$lock_file" ...)
 ```
 With the `CLAUDE_CONFIG_DIR` fix in place, this external lock is
 **redundant but harmless** — `proper-lockfile` serializes the refresh
 internally, and `flock` serializes the entire invocation externally.
 The external flock remains as a defense-in-depth measure; removal is
 tracked as a separate vision-tier issue.
 ## See also
 - `lib/env.sh:138-140` — `CLAUDE_SHARED_DIR` / `CLAUDE_CONFIG_DIR` defaults
 - `lib/claude-config.sh` — migration helper used by `disinto init`
 - `lib/agent-sdk.sh:139,144` — the external `flock` wrapper (deferred removal)
 - `docker/agents/entrypoint.sh:101-102` — `CLAUDE_CONFIG_DIR` auth detection
 - `.env.example:92-99` — operator-facing documentation of the env vars
 - Issue #623 — chat container auth strategy
--- a/docs/VAULT.md
+++ b/docs/VAULT.md
@ -26,8 +26,8 @@ The `main` branch on the ops repo (`johba/disinto-ops`) is protected via Forgejo
 ## Vault PR Lifecycle
-1. **Request** — Agent calls `lib/action-vault.sh:vault_request()` with action TOML content
+1. **Request** — Agent calls `lib/vault.sh:vault_request()` with action TOML content
-2. **Validation** — TOML is validated against the schema in `action-vault/vault-env.sh`
+2. **Validation** — TOML is validated against the schema in `vault/vault-env.sh`
 3. **PR Creation** — A PR is created on `disinto-ops` with:
   - Branch: `vault/<action-id>`
   - Title: `vault: <action-id>`
@ -90,12 +90,12 @@ To verify the protection is working:
 - #73 — Vault redesign proposal
 - #74 — Vault action TOML schema
- #75 — Vault PR creation helper (`lib/action-vault.sh`)
+- #75 — Vault PR creation helper (`lib/vault.sh`)
 - #76 — Dispatcher rewrite (poll for merged vault PRs)
 - #77 — Branch protection on ops repo (this issue)
 ## See Also
- [`lib/action-vault.sh`](../lib/action-vault.sh) — Vault PR creation helper
+- [`lib/vault.sh`](../lib/vault.sh) — Vault PR creation helper
- [`action-vault/vault-env.sh`](../action-vault/vault-env.sh) — TOML validation
+- [`vault/vault-env.sh`](../vault/vault-env.sh) — TOML validation
 - [`lib/branch-protection.sh`](../lib/branch-protection.sh) — Branch protection helper
--- a/docs/agents-llama.md
+++ b/docs/agents-llama.md
@ -1,59 +0,0 @@
 # agents-llama — Local-Qwen Agents
 The `agents-llama` service is an optional compose service that runs agents
 backed by a local llama-server instance (e.g. Qwen) instead of the Anthropic
 API. It uses the same Docker image as the main `agents` service but connects to
 a local inference endpoint via `ANTHROPIC_BASE_URL`.
 Two profiles are available:
 | Profile | Service | Roles | Use case |
 |---------|---------|-------|----------|
 | _(default)_ | `agents-llama` | `dev` only | Conservative: single-role soak test |
 | `agents-llama-all` | `agents-llama-all` | all 7 (review, dev, gardener, architect, planner, predictor, supervisor) | Pre-migration: validate every role on llama before Nomad cutover |
 ## Enabling
 Set `ENABLE_LLAMA_AGENT=1` in `.env` (or `.env.enc`) and provide the required
 credentials:
 ```env
 ENABLE_LLAMA_AGENT=1
 FORGE_TOKEN_LLAMA=<dev-qwen API token>
 FORGE_PASS_LLAMA=<dev-qwen password>
 ANTHROPIC_BASE_URL=http://host.docker.internal:8081   # llama-server endpoint
 ```
 Then regenerate the compose file (`disinto init ...`) and bring the stack up.
 ### Running all 7 roles (agents-llama-all)
 ```bash
 docker compose --profile agents-llama-all up -d
 ```
 This starts the `agents-llama-all` container with all 7 bot roles against the
 local llama endpoint. The per-role forge tokens (`FORGE_REVIEW_TOKEN`,
 `FORGE_GARDENER_TOKEN`, etc.) must be set in `.env` — they are the same tokens
 used by the Claude-backed `agents` container.
 ## Prerequisites
 - **llama-server** (or compatible OpenAI-API endpoint) running on the host,
  reachable from inside Docker at the URL set in `ANTHROPIC_BASE_URL`.
 - A Forgejo bot user (e.g. `dev-qwen`) with its own API token and password,
  stored as `FORGE_TOKEN_LLAMA` / `FORGE_PASS_LLAMA`.
 ## Behaviour
 - `agents-llama`: `AGENT_ROLES=dev` — only picks up dev work.
 - `agents-llama-all`: `AGENT_ROLES=review,dev,gardener,architect,planner,predictor,supervisor` — runs all 7 roles.
 - `CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=60` — more aggressive compaction for smaller
  context windows.
 - Serialises on the llama-server's single KV cache (AD-002).
 ## Disabling
 Set `ENABLE_LLAMA_AGENT=0` (or leave it unset) and regenerate. The service
 block is omitted entirely from `docker-compose.yml`; the stack starts cleanly
 without it.
--- a/docs/edge-routing-fallback.md
+++ b/docs/edge-routing-fallback.md
@ -1,149 +0,0 @@
 # Edge Routing Fallback: Per-Project Subdomains
 > **Status:** Contingency plan. Only implement if subpath routing (#704 / #708)
 > proves unworkable.
 ## Context
 The primary approach routes services under subpaths of `<project>.disinto.ai`:
 | Service    | Primary (subpath)                          |
 |------------|--------------------------------------------|
 | Forgejo    | `<project>.disinto.ai/forge/`              |
 | Woodpecker | `<project>.disinto.ai/ci/`                 |
 | Chat       | `<project>.disinto.ai/chat/`               |
 | Staging    | `<project>.disinto.ai/staging/`            |
 The fallback uses per-service subdomains instead:
 | Service    | Fallback (subdomain)                       |
 |------------|--------------------------------------------|
 | Forgejo    | `forge.<project>.disinto.ai/`              |
 | Woodpecker | `ci.<project>.disinto.ai/`                 |
 | Chat       | `chat.<project>.disinto.ai/`               |
 | Staging    | `<project>.disinto.ai/`  (root)            |
 The wildcard cert from #621 already covers `*.<project>.disinto.ai` — no new
 DNS records or certs are needed for sub-subdomains because `*.disinto.ai`
 matches one level deep. For sub-subdomains like `forge.<project>.disinto.ai`
 we would need to add a second wildcard (`*.*.disinto.ai`) or explicit DNS
 records per project. Both are straightforward with the existing Gandi DNS-01
 setup.
 ## Pivot Decision Criteria
 **Pivot if:**
 - Forgejo `ROOT_URL` under a subpath (`/forge/`) causes redirect loops that
  cannot be fixed with `X-Forwarded-Prefix` or Caddy `uri strip_prefix`.
 - Woodpecker's `WOODPECKER_HOST` does not honour subpath prefixes, causing
  OAuth callback mismatches that persist after adjusting redirect URIs.
 - Forward-auth on `/chat/*` conflicts with Forgejo's own OAuth flow when both
  share the same origin (cookie collision, CSRF token mismatch).
 **Do NOT pivot if:**
 - Forgejo login redirects to `/` instead of `/forge/` — fixable with Caddy
  `handle_path` + `uri prefix` rewrite.
 - Woodpecker UI assets 404 under `/ci/` — fixable with asset prefix config
  (`WOODPECKER_ROOT_PATH`).
 - A single OAuth app needs a second redirect URI — Forgejo supports multiple
  `redirect_uris` in the same app.
 ## Fallback Topology
 ### Caddyfile
 Replace the single `:80` block with four host blocks:
 ```caddy
 # Main project domain — staging / landing
 <project>.disinto.ai {
    reverse_proxy staging:80
 }
 # Forgejo — root path, no subpath rewrite needed
 forge.<project>.disinto.ai {
    reverse_proxy forgejo:3000
 }
 # Woodpecker CI — root path
 ci.<project>.disinto.ai {
    reverse_proxy woodpecker:8000
 }
 # Chat — with forward_auth (same as #709, but on its own host)
 chat.<project>.disinto.ai {
    handle /login {
        reverse_proxy chat:8080
    }
    handle /oauth/callback {
        reverse_proxy chat:8080
    }
    handle /* {
        forward_auth chat:8080 {
            uri /auth/verify
            copy_headers X-Forwarded-User
            header_up X-Forward-Auth-Secret {$FORWARD_AUTH_SECRET}
        }
        reverse_proxy chat:8080
    }
 }
 ```
 **Current file:** `docker/Caddyfile` (generated by `lib/generators.sh:_generate_caddyfile_impl`, line ~596).
 ### Service Configuration Changes
 | Variable / Setting         | Current (subpath)                              | Fallback (subdomain)                            | File                        |
 |----------------------------|------------------------------------------------|-------------------------------------------------|-----------------------------|
 | Forgejo `ROOT_URL`         | `https://<project>.disinto.ai/forge/`          | `https://forge.<project>.disinto.ai/`           | forgejo `app.ini`           |
 | `WOODPECKER_HOST`          | `http://localhost:8000` (subpath via proxy)     | `https://ci.<project>.disinto.ai`               | `lib/ci-setup.sh` line ~164 |
 | Woodpecker OAuth redirect  | `https://<project>.disinto.ai/ci/authorize`    | `https://ci.<project>.disinto.ai/authorize`     | `lib/ci-setup.sh` line ~153 |
 | Chat OAuth redirect        | `https://<project>.disinto.ai/chat/oauth/callback` | `https://chat.<project>.disinto.ai/oauth/callback` | `lib/ci-setup.sh` line ~188 |
 | `EDGE_TUNNEL_FQDN`         | `<project>.disinto.ai`                         | unchanged (main domain)                         | `lib/generators.sh` line ~432 |
 ### New Environment Variables (pivot only)
 These would be added to `lib/generators.sh` `_generate_compose_impl()` in the
 edge service environment block (currently line ~415):
 | Variable                     | Value                                  |
 |------------------------------|----------------------------------------|
 | `EDGE_TUNNEL_FQDN_FORGE`    | `forge.<project>.disinto.ai`           |
 | `EDGE_TUNNEL_FQDN_CI`       | `ci.<project>.disinto.ai`              |
 | `EDGE_TUNNEL_FQDN_CHAT`     | `chat.<project>.disinto.ai`            |
 ### DNS
 No new records needed if the registrar supports `*.*.disinto.ai` wildcards.
 Otherwise, add explicit A/CNAME records per project:
 ```
 forge.<project>.disinto.ai  → edge server IP
 ci.<project>.disinto.ai     → edge server IP
 chat.<project>.disinto.ai   → edge server IP
 ```
 The edge server already handles TLS via Caddy's automatic HTTPS with the
 existing ACME / DNS-01 challenge.
 ### Edge Control (`tools/edge-control/register.sh`)
 Currently `do_register()` creates a single route for `<project>.disinto.ai`.
 The fallback would need to register four routes (or accept a `--subdomain`
 parameter). See the TODO in `register.sh`.
 ## Files to Change on Pivot
 | File                              | What changes                                                    |
 |-----------------------------------|-----------------------------------------------------------------|
 | `docker/Caddyfile`               | Replace single host block → four host blocks (see above)        |
 | `lib/generators.sh`              | Add `EDGE_TUNNEL_FQDN_{FORGE,CI,CHAT}` env vars to compose     |
 | `lib/ci-setup.sh` ~line 153      | Woodpecker OAuth redirect URI → `ci.<project>` subdomain        |
 | `lib/ci-setup.sh` ~line 188      | Chat OAuth redirect URI → `chat.<project>` subdomain            |
 | `tools/edge-control/register.sh` | Register four routes per project instead of one                 |
 | `tools/edge-control/lib/caddy.sh`| `add_route()` gains subdomain support                           |
 | forgejo `app.ini`                 | `ROOT_URL` → `https://forge.<project>.disinto.ai/`             |
 Estimated effort for a full pivot: **under one day** given this plan.
--- a/docs/investigation-685-reviewer-approved-destructive-compose.md
+++ b/docs/investigation-685-reviewer-approved-destructive-compose.md
@ -1,123 +0,0 @@
 # Investigation: Reviewer approved destructive compose rewrite in PR #683
 **Issue**: #685
 **Date**: 2026-04-11
 **PR under investigation**: #683 (fix: config: gardener=1h, architect=9m, planner=11m)
 ## Summary
 The reviewer agent approved PR #683 in ~1 minute without flagging that it
 contained a destructive rewrite of `docker-compose.yml` — dropping named
 volumes, bind mounts, env vars, restart policy, and security options. Six
 structural gaps in the review pipeline allowed this to pass.
 ## Root causes
 ### 1. No infrastructure-file-specific review checklist
 The review formula (`formulas/review-pr.toml`) has a generic review checklist
 (bugs, security, imports, architecture, bash specifics, dead code). It has
 **no special handling for infrastructure files** — `docker-compose.yml`,
 `Dockerfile`, CI configs, or `entrypoint.sh` are reviewed with the same
 checklist as application code.
 Infrastructure files have a different failure mode: a single dropped line
 (a volume mount, an env var, a restart policy) can break a running deployment
 without any syntax error or linting failure. The generic checklist doesn't
 prompt the reviewer to check for these regressions.
 **Fix applied**: Added step 3c "Infrastructure file review" to
 `formulas/review-pr.toml` with a compose-specific checklist covering named
 volumes, bind mounts, env vars, restart policy, and security options.
 ### 2. No scope discipline
 Issue #682 asked for ~3 env var changes + `PLANNER_INTERVAL` plumbing — roughly
 10-15 lines across 3-4 files. PR #683's diff rewrote the entire compose service
 block (~50+ lines changed in `docker-compose.yml` alone).
 The review formula **does not instruct the reviewer to compare diff size against
 issue scope**. A scope-aware reviewer would flag: "this PR changes more lines
 than the issue scope warrants — request justification for out-of-scope changes."
 **Fix applied**: Added step 3d "Scope discipline" to `formulas/review-pr.toml`
 requiring the reviewer to compare actual changes against stated issue scope and
 flag out-of-scope modifications to infrastructure files.
 ### 3. Lessons-learned bias toward approval
 The reviewer's `.profile/knowledge/lessons-learned.md` contains multiple entries
 that systematically bias toward approval:
 - "Approval means 'ready to ship,' not 'perfect.'"
 - "'Different from how I'd write it' is not a blocker."
 - "Reserve request_changes for genuinely blocking concerns."
 These lessons are well-intentioned (they prevent nit-picking and false blocks)
 but they create a blind spot: the reviewer suppresses its instinct to flag
 suspicious-looking changes because the lessons tell it not to block on
 "taste-based" concerns. A compose service block rewrite *looks* like a style
 preference ("the dev reorganized the file") but is actually a correctness
 regression.
 **Recommendation**: The lessons-learned are not wrong — they should stay. But
 the review formula now explicitly carves out infrastructure files from the
 "bias toward APPROVE" guidance, making it clear that dropped infra
 configuration is a blocking concern, not a style preference.
 ### 4. No ground-truth for infrastructure files
 The reviewer only sees the diff. It has no way to compare against the running
 container's actual volume/env config. When dev-qwen rewrote a 30-line service
 block from scratch, the reviewer saw a 30-line addition and a 30-line deletion
 with no reference point.
 **Recommendation (future work)**: Maintain a `docker/expected-compose-config.yml`
 or have the reviewer fetch `docker compose config` output as ground truth when
 reviewing compose changes. This would let the reviewer diff the proposed config
 against the known-good config.
 ### 5. Structural analysis blind spot
 `lib/build-graph.py` tracks changes to files in `formulas/`, agent directories
 (`dev/`, `review/`, etc.), and `evidence/`. It does **not track infrastructure
 files** (`docker-compose.yml`, `docker/`, `.woodpecker/`). Changes to these
 files produce no alerts in the graph report — the reviewer gets no
 "affected objectives" signal for infrastructure changes.
 **Recommendation (future work)**: Add infrastructure file tracking to
 `build-graph.py` so that compose/Dockerfile/CI changes surface in the
 structural analysis.
 ### 6. Model and time budget
 Reviews use Sonnet (`CLAUDE_MODEL="sonnet"` at `review-pr.sh:229`) with a
 15-minute timeout. The PR #683 review completed in ~1 minute. Sonnet is
 optimized for speed, which is appropriate for most code reviews, but
 infrastructure changes benefit from the deeper reasoning of a more capable
 model.
 **Recommendation (future work)**: Consider escalating to a more capable model
 when the diff includes infrastructure files (compose, Dockerfiles, CI configs).
 ## Changes made
 1. **`formulas/review-pr.toml`** — Added two new review steps:
   - **Step 3c: Infrastructure file review** — When the diff touches
     `docker-compose.yml`, `Dockerfile*`, `.woodpecker/`, or `docker/`,
     requires checking for dropped volumes, bind mounts, env vars, restart
     policy, security options, and network config. Instructs the reviewer to
     read the full file (not just the diff) and compare against the base branch.
   - **Step 3d: Scope discipline** — Requires comparing the actual diff
     footprint against the stated issue scope. Flags out-of-scope rewrites of
     infrastructure files as blocking concerns.
 ## What would have caught this
 With the changes above, the reviewer would have:
 1. Seen step 3c trigger for `docker-compose.yml` changes
 2. Read the full compose file and compared against the base branch
 3. Noticed the dropped named volumes, bind mounts, env vars, restart policy
 4. Seen step 3d flag that a 3-env-var issue produced a 50+ line compose rewrite
 5. Issued REQUEST_CHANGES citing specific dropped configuration
--- a/docs/mirror-bootstrap.md
+++ b/docs/mirror-bootstrap.md
@ -1,59 +0,0 @@
 # Mirror Bootstrap — Pull-Mirror Cutover Path
 How to populate an empty Forgejo repo from an external source using
 `lib/mirrors.sh`'s `mirror_pull_register()`.
 ## Prerequisites
 | Variable | Example | Purpose |
 |---|---|---|
 | `FORGE_URL` | `http://forgejo:3000` | Forgejo instance base URL |
 | `FORGE_API_BASE` | `${FORGE_URL}/api/v1` | Global API base (set by `lib/env.sh`) |
 | `FORGE_TOKEN` | (admin or org-owner token) | Must have `repo:create` scope |
 The target org/user must already exist on the Forgejo instance.
 ## Command
 ```bash
 source lib/env.sh
 source lib/mirrors.sh
 # Register a pull mirror — creates the repo and starts the first sync.
 mirror_pull_register \
  "https://codeberg.org/johba/disinto.git" \   # source URL
  "disinto-admin" \                             # target owner
  "disinto" \                                   # target repo name
  "8h0m0s"                                      # sync interval (optional, default 8h)
 ```
 The function calls `POST /api/v1/repos/migrate` with `mirror: true`.
 Forgejo creates the repo and immediately queues the first sync.
 ## Verifying the sync
 ```bash
 # Check mirror status via API
 forge_api GET "/repos/disinto-admin/disinto" | jq '.mirror, .mirror_interval'
 # Confirm content arrived — should list branches
 forge_api GET "/repos/disinto-admin/disinto/branches" | jq '.[].name'
 ```
 The first sync typically completes within a few seconds for small-to-medium
 repos.  For large repos, poll the branches endpoint until content appears.
 ## Cutover scenario (Nomad migration)
 At cutover to the Nomad box:
 1. Stand up fresh Forgejo on the Nomad cluster (empty instance).
 2. Create the `disinto-admin` org via `disinto init` or API.
 3. Run `mirror_pull_register` pointing at the Codeberg source.
 4. Wait for sync to complete (check branches endpoint).
 5. Once content is confirmed, proceed with `disinto init` against the
   now-populated repo — all subsequent `mirror_push` calls will push
   to any additional mirrors configured in `projects/*.toml`.
 No manual `git clone` + `git push` step is needed.  The Forgejo pull-mirror
 handles the entire transfer.
--- a/docs/updating-factory.md
+++ b/docs/updating-factory.md
@ -18,12 +18,7 @@ git stash                           # save any local fixes
 git merge devbox/main
 ```
-## Note: docker-compose.yml is generator-only
+If merge conflicts on `docker-compose.yml`: delete it and regenerate in step 3.
 The `docker-compose.yml` file is now generated exclusively by `bin/disinto init`.
 The tracked file has been removed. If you have a local `docker-compose.yml` from
 before this change, it is now "yours" and won't be touched by future updates.
 To pick up generator improvements, delete the existing file and run `bin/disinto init`.
 ## Step 2: Preserve local config
@ -36,9 +31,9 @@ cp projects/harb.toml projects/harb.toml.backup
 cp docker-compose.override.yml docker-compose.override.yml.backup 2>/dev/null
 ```
-## Step 3: Regenerate docker-compose.yml
+## Step 3: Regenerate docker-compose.yml (if needed)
-If `generate_compose()` changed or you need a fresh compose file:
+Only needed if `generate_compose()` changed or the compose was deleted.
 ```bash
 rm docker-compose.yml
@ -52,15 +47,41 @@ init errors out.
 ### Known post-regeneration fixes (until #429 lands)
-Most generator issues have been fixed. The following items no longer apply:
+The generated compose has several issues on LXD deployments:
- **AppArmor (#492)** — Fixed: all services now have `apparmor=unconfined`
+**1. AppArmor (#492)** — Add to ALL services:
- **Forgejo image tag (#493)** — Fixed: generator uses `forgejo:11.0`
+```bash
- **Agent credential mounts (#495)** — Fixed: `.claude`, `.claude.json`, `.ssh`, and `project-repos` volumes are auto-generated
+sed -i '/^  forgejo:/a\    security_opt:\n      - apparmor=unconfined' docker-compose.yml
- **Repo path (#494)** — Not applicable: `projects/*.toml` files are gitignored and preserved
+sed -i '/^  agents:/a\    security_opt:\n      - apparmor=unconfined' docker-compose.yml
 # repeat for: agents-llama, edge, woodpecker, woodpecker-agent, staging, reproduce
 ```
-If you need to add custom volumes, edit the generated `docker-compose.yml` directly.
+**2. Forgejo image tag (#493)**:
-It will not be overwritten by future `init` runs (the generator skips existing files).
+```bash
 sed -i 's|forgejo/forgejo:.*|forgejo/forgejo:11.0|' docker-compose.yml
 ```
 **3. Agent credential mounts (#495)** — Add to agents volumes:
 ```yaml
 - ${HOME}/.claude:/home/agent/.claude
 - ${HOME}/.claude.json:/home/agent/.claude.json:ro
 - ${HOME}/.ssh:/home/agent/.ssh:ro
 - project-repos:/home/agent/repos
 ```
 **4. Repo path (#494)** — Fix `projects/harb.toml` if init overwrote it:
 ```bash
 sed -i 's|repo_root.*=.*"/home/johba/harb"|repo_root       = "/home/agent/repos/harb"|' projects/harb.toml
 sed -i 's|ops_repo_root.*=.*"/home/johba/harb-ops"|ops_repo_root   = "/home/agent/repos/harb-ops"|' projects/harb.toml
 ```
 **5. Add missing volumes** to the `volumes:` section at the bottom:
 ```yaml
 volumes:
  project-repos:
  project-repos-llama:
  disinto-logs:
 ```
 ## Step 4: Rebuild and restart
--- a/formulas/collect-engagement.toml
+++ b/formulas/collect-engagement.toml
@ -1,172 +0,0 @@
 # formulas/collect-engagement.toml — Collect website engagement data
 #
 # Daily formula: SSH into Caddy host, fetch access log, parse locally,
 # commit evidence JSON to ops repo via Forgejo API.
 #
 # Triggered by cron in the edge container entrypoint (daily at 23:50 UTC).
 # Design choices from #426: Q1=A (fetch raw log, process locally),
 # Q2=A (direct cron in edge container), Q3=B (dedicated purpose-limited SSH key).
 #
 # Steps: fetch-log → parse-engagement → commit-evidence
 name        = "collect-engagement"
 description = "SSH-fetch Caddy access log, parse engagement metrics, commit evidence"
 version     = 1
 [context]
 files = ["AGENTS.md"]
 [vars.caddy_host]
 description = "SSH host for the Caddy server"
 required    = false
 default     = "${CADDY_SSH_HOST:-disinto.ai}"
 [vars.caddy_user]
 description = "SSH user on the Caddy host"
 required    = false
 default     = "${CADDY_SSH_USER:-debian}"
 [vars.caddy_log_path]
 description = "Path to Caddy access log on the remote host"
 required    = false
 default     = "${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}"
 [vars.local_log_path]
 description = "Local path to store fetched access log"
 required    = false
 default     = "/tmp/caddy-access-log-fetch.log"
 [vars.evidence_dir]
 description = "Evidence output directory in the ops repo"
 required    = false
 default     = "evidence/engagement"
 # ── Step 1: SSH fetch ────────────────────────────────────────────────
 [[steps]]
 id          = "fetch-log"
 title       = "Fetch Caddy access log from remote host via SSH"
 description = """
 Fetch today's Caddy access log segment from the remote host using SCP.
 The SSH key is read from the environment (CADDY_SSH_KEY), which is
 decrypted from secrets/CADDY_SSH_KEY.enc by the edge entrypoint. It is NEVER hardcoded.
 1. Write the SSH key to a temporary file with restricted permissions:
     _ssh_key_file=$(mktemp)
     trap 'rm -f "$_ssh_key_file"' EXIT
     printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file"
     chmod 0600 "$_ssh_key_file"
 2. Verify connectivity:
     ssh -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \
       -o ConnectTimeout=10 -o BatchMode=yes \
       {{caddy_user}}@{{caddy_host}} 'echo ok'
 3. Fetch the access log via scp:
     scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \
       -o ConnectTimeout=10 -o BatchMode=yes \
       "{{caddy_user}}@{{caddy_host}}:{{caddy_log_path}}" \
       "{{local_log_path}}"
 4. Verify the fetched file is non-empty:
     if [ ! -s "{{local_log_path}}" ]; then
       echo "WARNING: fetched access log is empty — site may have no traffic"
     else
       echo "Fetched $(wc -l < "{{local_log_path}}") lines from {{caddy_host}}"
     fi
 5. Clean up the temporary key file:
     rm -f "$_ssh_key_file"
 """
 # ── Step 2: Parse engagement ─────────────────────────────────────────
 [[steps]]
 id          = "parse-engagement"
 title       = "Run collect-engagement.sh against the local log copy"
 description = """
 Run the engagement parser against the locally fetched access log.
 1. Set CADDY_ACCESS_LOG to point at the local copy so collect-engagement.sh
   reads from it instead of the default path:
     export CADDY_ACCESS_LOG="{{local_log_path}}"
 2. Run the parser:
     bash "$FACTORY_ROOT/site/collect-engagement.sh"
 3. Verify the evidence JSON was written:
     REPORT_DATE=$(date -u +%Y-%m-%d)
     EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"
     if [ -f "$EVIDENCE_FILE" ]; then
       echo "Evidence written: $EVIDENCE_FILE"
       jq . "$EVIDENCE_FILE"
     else
       echo "ERROR: evidence file not found at $EVIDENCE_FILE"
       exit 1
     fi
 4. Clean up the fetched log:
     rm -f "{{local_log_path}}"
 """
 needs       = ["fetch-log"]
 # ── Step 3: Commit evidence ──────────────────────────────────────────
 [[steps]]
 id          = "commit-evidence"
 title       = "Commit evidence JSON to ops repo via Forgejo API"
 description = """
 Commit the dated evidence JSON to the ops repo so the planner can
 consume it during gap analysis.
 1. Read the evidence file:
     REPORT_DATE=$(date -u +%Y-%m-%d)
     EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"
     CONTENT=$(base64 < "$EVIDENCE_FILE")
 2. Check if the file already exists in the ops repo (update vs create):
     OPS_OWNER="${OPS_FORGE_OWNER:-${FORGE_REPO%%/*}}"
     OPS_REPO="${OPS_FORGE_REPO:-${PROJECT_NAME:-disinto}-ops}"
     FILE_PATH="{{evidence_dir}}/${REPORT_DATE}.json"
     EXISTING=$(curl -sf \
       -H "Authorization: token ${FORGE_TOKEN}" \
       "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
       2>/dev/null || echo "")
 3. Create or update the file via Forgejo API:
     if [ -n "$EXISTING" ] && printf '%s' "$EXISTING" | jq -e '.sha' >/dev/null 2>&1; then
       # Update existing file
       SHA=$(printf '%s' "$EXISTING" | jq -r '.sha')
       curl -sf -X PUT \
         -H "Authorization: token ${FORGE_TOKEN}" \
         -H "Content-Type: application/json" \
         "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
         -d "$(jq -nc --arg content "$CONTENT" --arg sha "$SHA" --arg msg "evidence: engagement ${REPORT_DATE}" \
           '{message: $msg, content: $content, sha: $sha}')"
       echo "Updated existing evidence file in ops repo"
     else
       # Create new file
       curl -sf -X POST \
         -H "Authorization: token ${FORGE_TOKEN}" \
         -H "Content-Type: application/json" \
         "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
         -d "$(jq -nc --arg content "$CONTENT" --arg msg "evidence: engagement ${REPORT_DATE}" \
           '{message: $msg, content: $content}')"
       echo "Created evidence file in ops repo"
     fi
 4. Verify the commit landed:
     VERIFY=$(curl -sf \
       -H "Authorization: token ${FORGE_TOKEN}" \
       "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
       | jq -r '.name // empty')
     if [ "$VERIFY" = "${REPORT_DATE}.json" ]; then
       echo "Evidence committed: ${FILE_PATH}"
     else
       echo "ERROR: could not verify evidence commit"
       exit 1
     fi
 """
 needs       = ["parse-engagement"]
--- a/formulas/rent-a-human-caddy-ssh.toml
+++ b/formulas/rent-a-human-caddy-ssh.toml
@ -1,161 +0,0 @@
 # formulas/rent-a-human-caddy-ssh.toml — Provision SSH key for Caddy log collection
 #
 # "Rent a Human" — walk the operator through provisioning a purpose-limited
 # SSH keypair so collect-engagement.sh can fetch Caddy access logs remotely.
 #
 # The key uses a `command=` restriction so it can ONLY cat the access log.
 # No interactive shell, no port forwarding, no agent forwarding.
 #
 # Parent vision issue: #426
 # Sprint: website-observability-wire-up (ops PR #10)
 # Consumed by: site/collect-engagement.sh (issue #745)
 name        = "rent-a-human-caddy-ssh"
 description = "Provision a purpose-limited SSH keypair for remote Caddy log collection"
 version     = 1
 # ── Step 1: Generate keypair ─────────────────────────────────────────────────
 [[steps]]
 id    = "generate-keypair"
 title = "Generate a dedicated ed25519 keypair"
 description = """
 Generate a purpose-limited SSH keypair for Caddy log collection.
 Run on your local machine (NOT the Caddy host):
 ```
 ssh-keygen -t ed25519 -f caddy-collect -N '' -C 'disinto-collect-engagement'
 ```
 This produces two files:
  - caddy-collect      (private key — goes into the vault)
  - caddy-collect.pub  (public key — goes onto the Caddy host)
 Do NOT set a passphrase (-N '') — the factory runs unattended.
 """
 # ── Step 2: Install public key on Caddy host ─────────────────────────────────
 [[steps]]
 id    = "install-public-key"
 title = "Install the public key on the Caddy host with command= restriction"
 needs = ["generate-keypair"]
 description = """
 Install the public key on the Caddy host with a strict command= restriction
 so this key can ONLY read the access log.
 1. SSH into the Caddy host as the user who owns /var/log/caddy/access.log.
 2. Open (or create) ~/.ssh/authorized_keys:
     mkdir -p ~/.ssh && chmod 700 ~/.ssh
     nano ~/.ssh/authorized_keys
 3. Add this line (all on ONE line — do not wrap):
     command="cat /var/log/caddy/access.log",no-port-forwarding,no-X11-forwarding,no-agent-forwarding ssh-ed25519 AAAA... disinto-collect-engagement
   Replace "AAAA..." with the contents of caddy-collect.pub.
   To build the line automatically:
     echo "command=\"cat /var/log/caddy/access.log\",no-port-forwarding,no-X11-forwarding,no-agent-forwarding $(cat caddy-collect.pub)"
 4. Set permissions:
     chmod 600 ~/.ssh/authorized_keys
 What the restrictions do:
  - command="cat /var/log/caddy/access.log"
      Forces this key to only execute `cat /var/log/caddy/access.log`,
      regardless of what the client requests.
  - no-port-forwarding    — blocks SSH tunnels
  - no-X11-forwarding     — blocks X11
  - no-agent-forwarding   — blocks agent forwarding
 If the access log is at a different path, update the command= restriction
 AND set CADDY_ACCESS_LOG in the factory environment to match.
 """
 # ── Step 3: Add private key to vault secrets ─────────────────────────────────
 [[steps]]
 id    = "store-private-key"
 title = "Add the private key as CADDY_SSH_KEY secret"
 needs = ["generate-keypair"]
 description = """
 Store the private key in the factory's encrypted secrets store.
 1. Add the private key using `disinto secrets add`:
     cat caddy-collect | disinto secrets add CADDY_SSH_KEY
   This encrypts the key with age and stores it as secrets/CADDY_SSH_KEY.enc.
 2. IMPORTANT: After storing, securely delete the local private key file:
     shred -u caddy-collect 2>/dev/null || rm -f caddy-collect
     rm -f caddy-collect.pub
   The public key is already installed on the Caddy host; the private key
   now lives only in secrets/CADDY_SSH_KEY.enc.
 Never commit the private key to any git repository.
 """
 # ── Step 4: Configure Caddy host address ─────────────────────────────────────
 [[steps]]
 id    = "store-caddy-host"
 title = "Add the Caddy host details as secrets"
 needs = ["install-public-key"]
 description = """
 Store the Caddy connection details so collect-engagement.sh knows
 where to SSH.
 1. Add each value using `disinto secrets add`:
     echo 'disinto.ai' | disinto secrets add CADDY_SSH_HOST
     echo 'debian' | disinto secrets add CADDY_SSH_USER
     echo '/var/log/caddy/access.log' | disinto secrets add CADDY_ACCESS_LOG
   Replace values with the actual SSH host, user, and log path for your setup.
 """
 # ── Step 5: Test the connection ──────────────────────────────────────────────
 [[steps]]
 id    = "test-connection"
 title = "Verify the SSH key works and returns the access log"
 needs = ["install-public-key", "store-private-key", "store-caddy-host"]
 description = """
 Test the end-to-end connection before the factory tries to use it.
 1. From the factory host (or anywhere with the private key), run:
     ssh -i caddy-collect -o StrictHostKeyChecking=accept-new user@caddy-host
   Expected behavior:
     - Outputs the contents of /var/log/caddy/access.log
     - Disconnects immediately (command= restriction forces this)
   If you already shredded the local key, decode it from the vault:
     echo "$CADDY_SSH_KEY" | base64 -d > /tmp/caddy-collect-test
     chmod 600 /tmp/caddy-collect-test
     ssh -i /tmp/caddy-collect-test -o StrictHostKeyChecking=accept-new user@caddy-host
     rm -f /tmp/caddy-collect-test
 2. Verify the output is Caddy structured JSON (one JSON object per line):
     ssh -i /tmp/caddy-collect-test user@caddy-host | head -1 | jq .
   You should see fields like: ts, request, status, duration.
 3. If the connection fails:
     - Permission denied → check authorized_keys format (must be one line)
     - Connection refused → check sshd is running on the Caddy host
     - Empty output → check /var/log/caddy/access.log exists and is readable
       by the SSH user
     - "jq: error" → Caddy may be using Combined Log Format instead of
       structured JSON; check Caddy's log configuration
 4. Once verified, the factory's collect-engagement.sh can use this key
   to fetch logs remotely via:
     ssh -i <decoded-key-path> $CADDY_HOST
 """
--- a/formulas/review-pr.toml
+++ b/formulas/review-pr.toml
@ -80,64 +80,6 @@ For each BEHAVIORAL change in the diff (not pure bug fixes or formatting):
 This check is SKIPPED for pure bug fixes where the intended behavior is
 unchanged (the code was wrong, not the documentation).
 ## 3c. Infrastructure file review (conditional)
 If the diff touches ANY of these files, apply this additional checklist:
 - `docker-compose.yml` or `docker-compose.*.yml`
 - `Dockerfile` or `docker/*`
 - `.woodpecker/` CI configs
 - `docker/agents/entrypoint.sh`
 Infrastructure files have a different failure mode from application code:
 a single dropped line (a volume mount, an env var, a restart policy) can
 break a running deployment with no syntax error. Treat dropped
 infrastructure configuration as a **blocking defect**, not a style choice.
 ### For docker-compose.yml changes:
 1. **Read the full file** in the PR branch — do not rely only on the diff.
 2. Run `git diff <base>..HEAD -- docker-compose.yml` to see the complete
   change, not just the truncated diff.
 3. Check that NONE of the following were dropped without explicit
   justification in the PR description:
   - Named volumes (e.g. `agent-data`, `project-repos`)
   - Bind mounts (especially for config, secrets, SSH keys, shared dirs)
   - Environment variables (compare the full `environment:` block against
     the base branch)
   - `restart:` policy (should be `unless-stopped` for production services)
   - `security_opt:` settings
   - Network configuration
   - Resource limits / deploy constraints
 4. If ANY production configuration was dropped and the PR description does
   not explain why, **REQUEST_CHANGES**. List each dropped item explicitly.
 ### For Dockerfile / entrypoint changes:
 1. Check that base image, installed packages, and runtime deps are preserved.
 2. Verify that entrypoint/CMD changes don't break the container startup.
 ### For CI config changes:
 1. Check that pipeline steps aren't silently removed.
 2. Verify that secret references still match available secrets.
 ## 3d. Scope discipline
 Compare the actual diff footprint against the stated issue scope:
 1. Read the PR title and description to identify what the issue asked for.
 2. Estimate the expected diff size (e.g., "add 3 env vars" = ~5-10 lines
   in compose + ~5 lines in scripts).
 3. If the actual diff in ANY single file exceeds 3x the expected scope,
   flag it: "this file changed N lines but the issue scope suggests ~M."
 For infrastructure files (compose, Dockerfiles, CI), scope violations are
 **blocking**: REQUEST_CHANGES and ask the author to split out-of-scope
 changes into a separate PR or justify them in the description.
 For non-infrastructure files, scope violations are advisory: leave a
 non-blocking COMMENT noting the scope creep.
 ## 4. Vault item quality (conditional)
 If the PR adds or modifies vault item files (`vault/pending/*.md` in the ops repo), apply these
@ -213,7 +155,7 @@ should file a vault item instead of executing directly.
 **Exceptions** (do NOT flag these):
 - Code inside `vault/` — the vault system itself is allowed to handle secrets
 - References in comments or documentation explaining the architecture
- `bin/disinto` setup commands that manage `secrets/*.enc` and the `run` subcommand
+- `bin/disinto` setup commands that manage `.env.vault.enc` and the `run` subcommand
 - Local operations (git push to forge, forge API calls with `FORGE_TOKEN`)
 ## 6. Re-review (if previous review is provided)
@ -277,11 +219,9 @@ for actual problems (bugs, security issues, broken functionality, missing
 required behavior). Use DISCUSS sparingly.
 Note: The bias toward APPROVE applies to code correctness and style decisions.
-It does NOT apply to documentation consistency (step 3b), infrastructure file
+It does NOT apply to documentation consistency (step 3b) or tech-debt filing
-findings (step 3c), or tech-debt filing (step 7) — those are separate concerns
+(step 7) — those are separate concerns that should be handled regardless of
-that should be handled regardless of the change's correctness. In particular,
+the change's correctness.
 dropped production configuration (volumes, bind mounts, env vars, restart
 policy) is a blocking defect, not a style preference.
 ## 9. Output
--- a/formulas/run-architect.toml
+++ b/formulas/run-architect.toml
@ -16,14 +16,7 @@
 #            - Bash creates the ops PR with pitch content
 #            - Bash posts the ACCEPT/REJECT footer comment
 #   Step 3: Sprint PR creation with questions (issue #101) (one PR per pitch)
-#   Step 4: Post-merge sub-issue filing via filer-bot (#764)
+#   Step 4: Answer parsing + sub-issue filing (issue #102)
 #
 # Permission model (#764):
 #   architect-bot: READ-ONLY on project repo (GET issues/PRs/labels for context).
 #     Cannot POST/PUT/PATCH/DELETE any project-repo resource.
 #     Write access ONLY on ops repo (branches, PRs, comments).
 #   filer-bot: issues:write on project repo. Files sub-issues from merged sprint
 #     PRs via ops-filer pipeline. Adds in-progress label to vision issues.
 #
 # Architecture:
 # - Bash script (architect-run.sh) handles ALL state management
@ -153,32 +146,15 @@ For each issue in ARCHITECT_TARGET_ISSUES, bash performs:
 ## Recommendation
 <architect's assessment: worth it / defer / alternative approach>
 ## Sub-issues
 <!-- filer:begin -->
 - id: <kebab-case-id>
  title: "vision(#N): <concise sub-issue title>"
  labels: [backlog]
  depends_on: []
  body: |
    ## Goal
    <what this sub-issue accomplishes>
    ## Acceptance criteria
    - [ ] <criterion>
 <!-- filer:end -->
 IMPORTANT: Do NOT include design forks or questions yet. The pitch is a go/no-go
 decision for the human. Questions come only after acceptance.
 The ## Sub-issues block is parsed by the filer-bot pipeline after sprint PR merge.
 Each sub-issue between filer:begin/end markers becomes a Forgejo issue on the
 project repo. The filer appends a decomposed-from marker to each body automatically.
 4. Bash creates PR:
   - Create branch: architect/sprint-{pitch-number}
   - Write sprint spec to sprints/{sprint-slug}.md
   - Create PR with pitch content as body
   - Post footer comment: "Reply ACCEPT to proceed with design questions, or REJECT: <reason> to decline."
-   - NOTE: in-progress label is added by filer-bot after sprint PR merge (#764)
+   - Add in-progress label to vision issue
 Output:
 - One PR per vision issue (up to 3 per run)
@ -193,25 +169,9 @@ description = """
 IMPORTANT: PR creation is handled by bash (architect-run.sh) during the pitch step.
 This step is for documentation only — the actual PR creation happens in research_pitch.
 ## Approved PR → Initial design questions (issue #570)
 When a sprint pitch PR receives an APPROVED review but has no `## Design forks`
 section and no Q1:, Q2: comments yet, the architect enters a new state:
 1. detect_approved_pending_questions() identifies this state
 2. A fresh agent session starts with a special prompt
 3. The agent reads the approved pitch, posts initial design questions (Q1:, Q2:, etc.)
 4. The agent adds a `## Design forks` section to the PR body
 5. The PR transitions into the questions phase, where the existing Q&A loop takes over
 This ensures approved PRs don't sit indefinitely without design conversation.
 Architecture:
 - Bash creates PRs during stateless pitch generation (step 2)
 - Model has no role in PR creation — no Forgejo API access
 - architect-bot is READ-ONLY on the project repo (#764) — all project-repo
  writes (sub-issue filing, in-progress label) are handled by filer-bot
  via the ops-filer pipeline after sprint PR merge
 - This step describes the PR format for reference
 PR Format (created by bash):
@ -228,29 +188,64 @@ PR Format (created by bash):
   - Head: architect/sprint-{pitch-number}
   - Footer comment: "Reply ACCEPT to proceed with design questions, or REJECT: <reason> to decline."
 4. Add in-progress label to vision issue:
   - Look up label ID: GET /repos/{owner}/{repo}/labels
   - Add label: POST /repos/{owner}/{repo}/issues/{issue_number}/labels
 After creating all PRs, signal PHASE:done.
 NOTE: in-progress label on the vision issue is added by filer-bot after sprint PR merge (#764).
-## Forgejo API Reference (ops repo only)
+## Forgejo API Reference
-All operations use the ops repo Forgejo API with `Authorization: token ${FORGE_TOKEN}` header.
+All operations use the Forgejo API with Authorization: token ${FORGE_TOKEN} header.
 architect-bot is READ-ONLY on the project repo — cannot POST/PUT/PATCH/DELETE project-repo resources (#764).
-### Create branch (ops repo)
+### Create branch
 ```
-POST /repos/{owner}/{repo-ops}/branches
+POST /repos/{owner}/{repo}/branches
 Body: {"new_branch_name": "architect/<sprint-slug>", "old_branch_name": "main"}
 ```
-### Create/update file (ops repo)
+### Create/update file
 ```
-PUT /repos/{owner}/{repo-ops}/contents/<path>
+PUT /repos/{owner}/{repo}/contents/<path>
 Body: {"message": "sprint: add <sprint-slug>.md", "content": "<base64-encoded-content>", "branch": "architect/<sprint-slug>"}
 ```
-### Create PR (ops repo)
+### Create PR
 ```
-POST /repos/{owner}/{repo-ops}/pulls
+POST /repos/{owner}/{repo}/pulls
 Body: {"title": "architect: <sprint summary>", "body": "<markdown-text>", "head": "architect/<sprint-slug>", "base": "main"}
 ```
 **Important: PR body format**
 - The body field must contain plain markdown text (the raw content from the model)
 - Do NOT JSON-encode or escape the body — pass it as a JSON string value
 - Newlines and markdown formatting (headings, lists, etc.) must be preserved as-is
 ### Add label to issue
 ```
 POST /repos/{owner}/{repo}/issues/{index}/labels
 Body: {"labels": [<label-id>]}
 ```
 ## Forgejo API Reference
 All operations use the Forgejo API with `Authorization: token ${FORGE_TOKEN}` header.
 ### Create branch
 ```
 POST /repos/{owner}/{repo}/branches
 Body: {"new_branch_name": "architect/<sprint-slug>", "old_branch_name": "main"}
 ```
 ### Create/update file
 ```
 PUT /repos/{owner}/{repo}/contents/<path>
 Body: {"message": "sprint: add <sprint-slug>.md", "content": "<base64-encoded-content>", "branch": "architect/<sprint-slug>"}
 ```
 ### Create PR
 ```
 POST /repos/{owner}/{repo}/pulls
 Body: {"title": "architect: <sprint summary>", "body": "<markdown-text>", "head": "architect/<sprint-slug>", "base": "main"}
 ```
@ -259,22 +254,30 @@ Body: {"title": "architect: <sprint summary>", "body": "<markdown-text>", "head"
 - Do NOT JSON-encode or escape the body — pass it as a JSON string value
 - Newlines and markdown formatting (headings, lists, etc.) must be preserved as-is
-### Close PR (ops repo)
+### Close PR
 ```
-PATCH /repos/{owner}/{repo-ops}/pulls/{index}
+PATCH /repos/{owner}/{repo}/pulls/{index}
 Body: {"state": "closed"}
 ```
-### Delete branch (ops repo)
+### Delete branch
 ```
-DELETE /repos/{owner}/{repo-ops}/git/branches/<branch-name>
+DELETE /repos/{owner}/{repo}/git/branches/<branch-name>
 ```
-### Read-only on project repo (context gathering)
+### Get labels (look up label IDs by name)
 ```
-GET /repos/{owner}/{repo}/issues          — list issues
+GET /repos/{owner}/{repo}/labels
-GET /repos/{owner}/{repo}/issues/{number} — read issue details
+```
-GET /repos/{owner}/{repo}/labels          — list labels
+
-GET /repos/{owner}/{repo}/pulls           — list PRs
+### Add label to issue (for in-progress on vision issue)
 ```
 POST /repos/{owner}/{repo}/issues/{index}/labels
 Body: {"labels": [<label-id>]}
 ```
 ### Remove label from issue (for in-progress removal on REJECT)
 ```
 DELETE /repos/{owner}/{repo}/issues/{index}/labels/{label-id}
 ```
 """
--- a/formulas/run-gardener.toml
+++ b/formulas/run-gardener.toml
@ -177,7 +177,7 @@ DUST (trivial — single-line edit, rename, comment, style, whitespace):
 VAULT (needs human decision or external resource):
  File a vault procurement item using vault_request():
-    source "$(dirname "$0")/../lib/action-vault.sh"
+    source "$(dirname "$0")/../lib/vault.sh"
    TOML_CONTENT="# Vault action: <action_id>
 context = \"<description of what decision/resource is needed>\"
 unblocks = [\"#NNN\"]
--- a/formulas/run-planner.toml
+++ b/formulas/run-planner.toml
@ -243,7 +243,7 @@ needs = ["preflight"]
 [[steps]]
 id    = "commit-ops-changes"
-title = "Write tree, memory, and journal; commit and push branch"
+title = "Write tree, memory, and journal; commit and push"
 description = """
 ### 1. Write prerequisite tree
 Write to: $OPS_REPO_ROOT/prerequisites.md
@ -256,16 +256,14 @@ If (count - N) >= 5 or planner-memory.md missing, write to:
 Include: run counter marker, date, constraint focus, patterns, direction.
 Keep under 100 lines. Replace entire file.
-### 3. Commit ops repo changes to the planner branch
+### 3. Commit ops repo changes
-Commit the ops repo changes (prerequisites, memory, vault items) and push the
+Commit the ops repo changes (prerequisites, memory, vault items):
 branch. Do NOT push directly to $PRIMARY_BRANCH — planner-run.sh will create a
 PR and walk it to merge via review-bot.
  cd "$OPS_REPO_ROOT"
  git add prerequisites.md knowledge/planner-memory.md vault/pending/
  git add -u
  if ! git diff --cached --quiet; then
    git commit -m "chore: planner run $(date -u +%Y-%m-%d)"
-    git push origin HEAD
+    git push origin "$PRIMARY_BRANCH"
  fi
  cd "$PROJECT_REPO_ROOT"
--- a/formulas/run-predictor.toml
+++ b/formulas/run-predictor.toml
@ -125,8 +125,8 @@ For each weakness you identify, choose one:
  The prediction explains the theory. The vault PR triggers the proof
  after human approval. When the planner runs next, evidence is already there.
-  Vault dispatch (requires lib/action-vault.sh):
+  Vault dispatch (requires lib/vault.sh):
-    source "$PROJECT_REPO_ROOT/lib/action-vault.sh"
+    source "$PROJECT_REPO_ROOT/lib/vault.sh"
    TOML_CONTENT="id = \"predict-<prediction_number>-<formula>\"
 context = \"Test prediction #<prediction_number>: <theory summary> — focus: <specific test>\"
@ -154,7 +154,7 @@ tea is pre-configured with login "$TEA_LOGIN" and repo "$FORGE_REPO".
       --title "<title>" --body "<body>" --labels "prediction/unreviewed"
 2. Dispatch formula via vault (if exploiting):
-     source "$PROJECT_REPO_ROOT/lib/action-vault.sh"
+     source "$PROJECT_REPO_ROOT/lib/vault.sh"
     PR_NUM=$(vault_request "predict-NNN-<formula>" "$TOML_CONTENT")
     # See EXPLOIT section above for TOML_CONTENT format
--- a/gardener/AGENTS.md
+++ b/gardener/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: c363ee0aea2ae447daab28c2c850d6abefc8c6b5 -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Gardener Agent
 **Role**: Backlog grooming — detect duplicate issues, missing acceptance
@ -32,7 +32,7 @@ the gardener runs as part of the polling loop alongside the planner, predictor,
  PR, reviewed alongside AGENTS.md changes, executed by gardener-run.sh after merge.
 **Environment variables consumed**:
- `FORGE_TOKEN`, `FORGE_GARDENER_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`. `FORGE_TOKEN_OVERRIDE` is exported to `$FORGE_GARDENER_TOKEN` before sourcing env.sh so the gardener-bot identity survives re-sourcing (#762).
+- `FORGE_TOKEN`, `FORGE_GARDENER_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`
 - `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by gardener-run.sh)
 **Lifecycle**: gardener-run.sh (invoked by polling loop every 6h, `check_active gardener`) →
--- a/gardener/gardener-run.sh
+++ b/gardener/gardener-run.sh
@ -26,11 +26,10 @@ FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
 # Accept project config from argument; default to disinto
 export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}"
 # Set override BEFORE sourcing env.sh so it survives any later re-source of
 # env.sh from nested shells / claude -p tools (#762, #747)
 export FORGE_TOKEN_OVERRIDE="${FORGE_GARDENER_TOKEN:-}"
 # shellcheck source=../lib/env.sh
 source "$FACTORY_ROOT/lib/env.sh"
 # Use gardener-bot's own Forgejo identity (#747)
 FORGE_TOKEN="${FORGE_GARDENER_TOKEN:-${FORGE_TOKEN}}"
 # shellcheck source=../lib/formula-session.sh
 source "$FACTORY_ROOT/lib/formula-session.sh"
 # shellcheck source=../lib/worktree.sh
@ -68,12 +67,6 @@ memory_guard 2000
 log "--- Gardener run start ---"
 # ── Resolve forge remote for git operations ─────────────────────────────
 # Run git operations from the project checkout, not the baked code dir
 cd "$PROJECT_REPO_ROOT"
 resolve_forge_remote
 # ── Precondition checks: skip if nothing to do ────────────────────────────
 # Check for new commits since last run
 CURRENT_SHA=$(git -C "$FACTORY_ROOT" rev-parse HEAD 2>/dev/null || echo "")
@ -92,6 +85,9 @@ fi
 log "current sha: ${CURRENT_SHA:0:8}..., backlog issues: ${backlog_count}, tech-debt issues: ${tech_debt_count}"
 # ── Resolve forge remote for git operations ─────────────────────────────
 resolve_forge_remote
 # ── Resolve agent identity for .profile repo ────────────────────────────
 resolve_agent_identity || true
--- a/gardener/pending-actions.json
+++ b/gardener/pending-actions.json
@ -1,12 +1,47 @@
 [
  {
-    "action": "comment",
+    "action": "close",
-    "issue": 623,
+    "issue": 419,
-    "body": "**Dependency check:** All blocking dependencies are now closed:\n- #620 ✓ closed\n- #621 ✓ closed  \n- #622 ✓ closed\n\nPer the issue description: *\"Once #620/#621/#622 are green, this issue should fork into at least three backlog children: subpath routing + Forgejo ROOT_URL / Woodpecker HOST, disinto-chat container scaffold with OAuth gate, and Claude Code sandbox envelope + working-dir scoping.\"*\n\nThis vision issue is ready for the planner to decompose into backlog children."
+    "reason": "Vision goal complete — all sub-issues #437-#454 closed, vault blast-radius redesign delivered"
  },
  {
-    "action": "comment",
+    "action": "close",
-    "issue": 758,
+    "issue": 494,
-    "body": "**Gardener flag:** This issue requires human admin action on Forgejo to resolve — changing branch protection settings on the ops repo. No automated formula can fix Forgejo admin settings.\n\nProposed options (from issue body):\n1. Add `planner-bot` to the merge whitelist in ops repo branch protection\n2. Remove branch protection from the ops repo (agents are primary writers)\n3. Create an admin-level service token for agents\n\nThis is blocking all ops repo writes (planner knowledge, sprint artifacts, vault items)."
+    "reason": "Resolved by PRs #502 and #503 (both merged) — repo_root workaround removed, container paths derived at runtime"
  },
  {
    "action": "close",
    "issue": 477,
    "reason": "Obsolete — #379 (while-true loop) was deployed on 2026-04-08; env.sh container guard is now correct behavior, no revert needed"
  },
  {
    "action": "edit_body",
    "issue": 498,
    "body": "Flagged by AI reviewer in PR #496.\n\n## Problem\n\n`has_responses_to_process` is only set to `true` inside the `open_arch_prs >= 3` gate in `architect/architect-run.sh` (line 543). When fewer than 3 architect PRs are open, ACCEPT/REJECT responses on existing PRs are never processed — the response-processing block at line 687 defaults to `false` and is skipped entirely.\n\nThis means that if a user ACCEPTs or REJECTs a pitch while the open PR count is below 3, the architect agent will never handle the response.\n\n## Fix\n\nSet `has_responses_to_process` (or an equivalent guard) unconditionally by scanning open PRs for ACCEPT/REJECT responses, not only when the 3-PR cap is hit.\n\n---\n*Auto-created from AI review*\n\n## Acceptance criteria\n\n- [ ] `has_responses_to_process` is computed by scanning open architect PRs for ACCEPT/REJECT responses regardless of `open_arch_prs` count\n- [ ] When a user posts ACCEPT or REJECT on an architect PR and open PR count < 3, the response is processed in the same run\n- [ ] Existing behavior when `open_arch_prs >= 3` is unchanged\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `architect/architect-run.sh` (lines ~543 and ~687 — response-processing gate)"
  },
  {
    "action": "add_label",
    "issue": 498,
    "label": "backlog"
  },
  {
    "action": "edit_body",
    "issue": 499,
    "body": "Flagged by AI reviewer in PR #496.\n\n## Problem\n\nIn `architect/architect-run.sh` line 203, the `has_open_subissues` function compares `.number` (a JSON integer) against `$vid` (a bash string via `--arg`). In jq, `42 != \"42\"` evaluates to true (different types are never equal), so the self-exclusion filter never fires. In practice this is low-risk since vision issues don't contain 'Decomposed from #N' in their own bodies, but the self-exclusion logic is silently broken.\n\n## Fix\n\nCast the string to a number in jq: `select(.number != ($vid | tonumber))`\n\n---\n*Auto-created from AI review*\n\n## Acceptance criteria\n\n- [ ] `has_open_subissues` self-exclusion filter correctly excludes the vision issue itself using `($vid | tonumber)` cast\n- [ ] A vision issue does not appear in its own subissue list\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `architect/architect-run.sh` (line ~203 — `has_open_subissues` jq filter)"
  },
  {
    "action": "add_label",
    "issue": 499,
    "label": "backlog"
  },
  {
    "action": "edit_body",
    "issue": 471,
    "body": "## Bug description\n\nWhen dev-bot picks a backlog issue and launches dev-agent.sh, a second dev-poll instance (dev-qwen) can race ahead and mark the issue as stale/blocked before dev-agent.sh finishes claiming it.\n\n## Reproduction\n\nObserved on issues #443 and #445 (2026-04-08):\n\n**#443 timeline:**\n- `20:39:03` — dev-bot removes `backlog`, adds `in-progress` (via dev-poll backlog pickup)\n- `20:39:04` — dev-qwen removes `in-progress`, adds `blocked` with reason `no_assignee_no_open_pr_no_lock`\n- `20:40:11` — dev-bot pushes commit (dev-agent was actually working the whole time)\n- `20:44:02` — PR merged, issue closed\n\n**#445 timeline:**\n- `20:54:03` — dev-bot adds `in-progress`\n- `20:54:06` — dev-qwen marks `blocked` (3 seconds later)\n- `20:55:13` — dev-bot pushes commit\n- `21:09:03` — PR merged, issue closed\n\nIn both cases, the work completed successfully despite being labeled blocked.\n\n## Root cause\n\n`issue_claim()` in `lib/issue-lifecycle.sh` performs three sequential API calls:\n1. PATCH assignee\n2. POST in-progress label\n3. DELETE backlog label\n\nMeanwhile, dev-poll on another agent (dev-qwen) runs its orphan scan, sees the issue labeled `in-progress` but with no assignee set yet (assign PATCH hasn't landed or was read stale), no open PR, and no lock file. It concludes the issue is stale and relabels to `blocked`.\n\nThe race window is ~1-3 seconds between in-progress being set and the assignee being visible to other pollers.\n\n## Impact\n\n- Issues get spuriously labeled `blocked` with a misleading stale diagnostic comment\n- dev-agent continues working anyway (it already has the issue number), so the blocked label is just noise\n- But it could confuse the gardener or humans reading the issue timeline\n- If another dev-poll instance picks up the blocked issue for recovery before the original agent finishes, it could cause duplicate work\n\n## Possible fixes\n\n1. **Assign before labeling**: In `issue_claim()`, set the assignee first, then add in-progress. This way, by the time in-progress is visible, the assignee is already set.\n2. **Grace period in stale detection**: Skip issues whose in-progress label was added less than N seconds ago (check label event timestamp via timeline API).\n3. **Lock file before label**: Write the agent lock file (`/tmp/dev-impl-summary-...`) at the start of dev-agent.sh before calling `issue_claim()`, so the stale detector sees the lock.\n4. **Atomic claim check**: dev-poll should re-check assignee after a short delay before declaring stale, to allow for API propagation.\n\n## Acceptance criteria\n\n- [ ] Stale detection in dev-poll does not mark an issue as blocked within the first 60 seconds of the in-progress label being applied\n- [ ] `issue_claim()` assigns the issue before adding the in-progress label (or equivalent fix is implemented)\n- [ ] No spurious `blocked` labels appear on issues that are actively being worked (verified by log inspection or integration test)\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `lib/issue-lifecycle.sh` — `issue_claim()` function (assignee + label ordering)\n- `dev/dev-poll.sh` — orphan/stale detection logic"
  },
  {
    "action": "add_label",
    "issue": 471,
    "label": "backlog"
  }
 ]
--- a/lib/AGENTS.md
+++ b/lib/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: c363ee0aea2ae447daab28c2c850d6abefc8c6b5 -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Shared Helpers (`lib/`)
 All agents source `lib/env.sh` as their first action. Additional helpers are
@ -6,15 +6,15 @@ sourced as needed.
 | File | What it provides | Sourced by |
 |---|---|---|
-| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold), `load_secret()` (secret-source abstraction — see below). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Per-agent token override (#762)**: agent run scripts export `FORGE_TOKEN_OVERRIDE=<agent-specific-token>` BEFORE sourcing `env.sh`; `env.sh` applies this override at lines 98-100, ensuring the correct identity survives any re-sourcing of `env.sh` by nested shells or `claude -p` invocations. **Required env var**: `FORGE_PASS` — bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). **Hard preconditions (#674)**: `USER` and `HOME` must be exported by the entrypoint before sourcing. When `PROJECT_TOML` is set, `PROJECT_REPO_ROOT`, `PRIMARY_BRANCH`, and `OPS_REPO_ROOT` must also be set (by entrypoint or TOML). **`load_secret NAME [DEFAULT]` (#793)**: backend-agnostic secret resolution. Precedence: (1) `/secrets/<NAME>.env` — Nomad-rendered template, (2) current environment — already set by `.env.enc` / compose, (3) `secrets/<NAME>.enc` — age-encrypted per-key file (decrypted on demand, cached in process env), (4) DEFAULT or empty. Consumers call `$(load_secret GITHUB_TOKEN)` instead of `${GITHUB_TOKEN}` — identical behavior whether secrets come from Docker compose injection or Nomad Vault templates. | Every agent |
+| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Required env var**: `FORGE_PASS` — bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). | Every agent |
 | `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr |
 | `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) |
 | `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. `<pipeline_number> [--step <name>]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh |
-| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). Reads `repo_root` and `ops_repo_root` from the TOML for host-CLI callers. **Container path handling (#674)**: no longer derives `PROJECT_REPO_ROOT` or `OPS_REPO_ROOT` inside the script — container entrypoints export the correct paths before agent scripts source `env.sh`, and the `DISINTO_CONTAINER` guard (line 90) skips TOML overrides when those vars are already set. | env.sh (when `PROJECT_TOML` is set) |
+| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). **Container path derivation**: `PROJECT_REPO_ROOT` and `OPS_REPO_ROOT` are derived at runtime when `DISINTO_CONTAINER=1` — hardcoded to `/home/agent/repos/$PROJECT_NAME` and `/home/agent/repos/$PROJECT_NAME-ops` respectively — not read from the TOML. This ensures correct paths inside containers where host paths in the TOML would be wrong. | env.sh (when `PROJECT_TOML` is set) |
 | `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll |
-| `lib/formula-session.sh` | `acquire_run_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_restore_lessons()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven polling-loop agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). **Journal digestion guards (#702)**: `_profile_digest_journals()` respects `PROFILE_DIGEST_TIMEOUT` (default 300s) and `PROFILE_DIGEST_MAX_BATCH` (default 5 journals per run); `_profile_restore_lessons()` restores the previous lessons-learned.md on digest failure. | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh |
+| `lib/formula-session.sh` | `acquire_run_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven polling-loop agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh |
 | `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in loop logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | polling-loop entry points |
-| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. `mirror_pull_register(clone_url, owner, repo_name, [interval])` — registers a Forgejo pull mirror via `POST /repos/migrate` with `mirror: true`. Creates the target repo and queues the first sync automatically. Works against empty Forgejo instances — no pre-existing content required. Used for Nomad migration cutover: point at Codeberg source, wait for sync, then proceed with `disinto init`. See [docs/mirror-bootstrap.md](../docs/mirror-bootstrap.md) for the full cutover path. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh |
+| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh |
 | `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh |
 | `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh |
 | `lib/stack-lock.sh` | File-based lock protocol for singleton project stack access. `stack_lock_acquire(holder, project)` — polls until free, breaks stale heartbeats (>10 min old), claims lock. `stack_lock_release(project)` — deletes lock file. `stack_lock_check(project)` — inspect current lock state. `stack_lock_heartbeat(project)` — update heartbeat timestamp (callers must call every 2 min while holding). Lock files at `~/data/locks/<project>-stack.lock`. | docker/edge/dispatcher.sh, reproduce formula |
@ -22,16 +22,13 @@ sourced as needed.
 | `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)` — `git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh |
 | `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future) |
 | `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future) |
-| `lib/action-vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request <action_id> <toml_content>` validates TOML (using `validate_vault_action` from `action-vault/vault-env.sh`), creates branch `vault/<action-id>`, writes `vault/actions/<action-id>.toml`, creates PR targeting `main` with title `vault: <action-id>` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. **Low-tier bypass**: if the action's `blast_radius` classifies as `low` (via `action-vault/classify.sh`), `vault_request` calls `_vault_commit_direct()` which commits directly to ops `main` using `FORGE_ADMIN_TOKEN` — no PR, no approval wait. Returns `0` (not a PR number) for direct commits. Requires `FORGE_TOKEN`, `FORGE_ADMIN_TOKEN` (low-tier only), `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher |
+| `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request <action_id> <toml_content>` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/<action-id>`, writes `vault/actions/<action-id>.toml`, creates PR targeting `main` with title `vault: <action-id>` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. **Low-tier bypass**: if the action's `blast_radius` classifies as `low` (via `vault/classify.sh`), `vault_request` calls `_vault_commit_direct()` which commits directly to ops `main` using `FORGE_ADMIN_TOKEN` — no PR, no approval wait. Returns `0` (not a PR number) for direct commits. Requires `FORGE_TOKEN`, `FORGE_ADMIN_TOKEN` (low-tier only), `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher |
 | `lib/branch-protection.sh` | Branch protection helpers for Forgejo repos. `setup_vault_branch_protection()` — configures admin-only merge protection on main (require 1 approval, restrict merge to admin role, block direct pushes). `setup_profile_branch_protection()` — same protection for `.profile` repos. `verify_branch_protection()` — checks protection is correctly configured. `remove_branch_protection()` — removes protection (cleanup/testing). Handles race condition after initial push: retries with backoff if Forgejo hasn't processed the branch yet. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_OPS_REPO`. | bin/disinto (hire-an-agent) |
-| `lib/agent-sdk.sh` | `agent_run([--resume SESSION_ID] [--worktree DIR] PROMPT)` — one-shot `claude -p` invocation with session persistence. Saves session ID to `SID_FILE`, reads it back on resume. `agent_recover_session()` — restore previous session ID from `SID_FILE` on startup. **Nudge guard**: skips nudge injection if the worktree is clean and no push is expected, preventing spurious re-invocations. Callers must define `SID_FILE`, `LOGFILE`, and `log()` before sourcing. **Concurrency**: external `flock` on `session.lock` is gated behind `CLAUDE_EXTERNAL_LOCK=1` (default off). When unset, each container's per-session `CLAUDE_CONFIG_DIR` isolation lets Claude Code's native lockfile handle OAuth refresh — no external serialization needed. Set `CLAUDE_EXTERNAL_LOCK=1` to re-enable the old flock wrapper as a rollback mechanism. See [`docs/CLAUDE-AUTH-CONCURRENCY.md`](../docs/CLAUDE-AUTH-CONCURRENCY.md) and AD-002 (#647). | formula-driven agents (dev-agent, planner-run, predictor-run, gardener-run) |
+| `lib/agent-sdk.sh` | `agent_run([--resume SESSION_ID] [--worktree DIR] PROMPT)` — one-shot `claude -p` invocation with session persistence. Saves session ID to `SID_FILE`, reads it back on resume. `agent_recover_session()` — restore previous session ID from `SID_FILE` on startup. **Nudge guard**: skips nudge injection if the worktree is clean and no push is expected, preventing spurious re-invocations. Callers must define `SID_FILE`, `LOGFILE`, and `log()` before sourcing. | formula-driven agents (dev-agent, planner-run, predictor-run, gardener-run) |
 | `lib/forge-setup.sh` | `setup_forge()` — Forgejo instance provisioning: creates admin user, bot accounts, org, repos (code + ops), configures webhooks, sets repo topics. Extracted from `bin/disinto`. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`. **Password storage (#361)**: after creating each bot account, stores its password in `.env` as `FORGE_<BOT>_PASS` (e.g. `FORGE_PASS`, `FORGE_REVIEW_PASS`, etc.) for use by `forge-push.sh`. | bin/disinto (init) |
 | `lib/forge-push.sh` | `push_to_forge()` — pushes a local clone to the Forgejo remote and verifies the push. `_assert_forge_push_globals()` validates required env vars before use. Requires `FORGE_URL`, `FORGE_PASS`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. **Auth**: uses `FORGE_PASS` (bot password) for git HTTP push — Forgejo 11.x rejects API tokens for `git push` (#361). | bin/disinto (init) |
 | `lib/git-creds.sh` | Shared git credential helper configuration. `configure_git_creds([HOME_DIR] [RUN_AS_CMD])` — writes a static credential helper script and configures git globally to use password-based HTTP auth (Forgejo 11.x rejects API tokens for `git push`, #361). **Retry on cold boot (#741)**: resolves bot username from `FORGE_TOKEN` with 5 retries (exponential backoff 1-5s); fails loudly and returns 1 if Forgejo is unreachable — never falls back to a wrong hardcoded default (exports `BOT_USER` on success). `repair_baked_cred_urls([--as RUN_AS_CMD] DIR ...)` — rewrites any git remote URLs that have credentials baked in to use clean URLs instead; uses `safe.directory` bypass for root-owned repos (#671). Requires `FORGE_PASS`, `FORGE_URL`, `FORGE_TOKEN`. | entrypoints (agents, edge) |
 | `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence, sprints). Evidence subdirectories seeded: engagement/, red-team/, holdout/, evolution/, user-test/. Also seeds sprints/ for architect output. Exports `_ACTUAL_OPS_SLUG`. `migrate_ops_repo(ops_root, [primary_branch])` — idempotent migration helper that seeds missing directories and .gitkeep files on existing ops repos (pre-#407 deployments). | bin/disinto (init) |
-| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for bare-metal deployments (compose mode uses polling loop instead). `_create_forgejo_oauth_app()` — generic helper to create an OAuth2 app on Forgejo (shared by Woodpecker and chat). `_create_woodpecker_oauth_impl()` — creates Woodpecker OAuth2 app (thin wrapper). `_create_chat_oauth_impl()` — creates disinto-chat OAuth2 app, writes `CHAT_OAUTH_CLIENT_ID`/`CHAT_OAUTH_CLIENT_SECRET` to `.env` (#708). `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) |
+| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for bare-metal deployments (compose mode uses polling loop instead). `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) |
-| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (uses `codeberg.org/forgejo/forgejo:11.0` tag; adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility; Forgejo includes a healthcheck so dependent services use `condition: service_healthy` — fixes cold-start races, #665; adds `chat` service block with isolated `chat-config` named volume and `CHAT_HISTORY_DIR` bind-mount for per-user NDJSON history persistence (#710); injects `FORWARD_AUTH_SECRET` for Caddy↔chat defense-in-depth auth (#709); cost-cap env vars `CHAT_MAX_REQUESTS_PER_HOUR`, `CHAT_MAX_REQUESTS_PER_DAY`, `CHAT_MAX_TOKENS_PER_DAY` (#711); subdomain fallback comment for `EDGE_TUNNEL_FQDN_*` vars (#713); all `depends_on` now use `condition: service_healthy/started` instead of bare service names; all services now include `restart: unless-stopped` including the edge service — #768; agents service now uses `image: ghcr.io/disinto/agents:${DISINTO_IMAGE_TAG:-latest}` instead of `build:` (#429); `WOODPECKER_PLUGINS_PRIVILEGED` env var added to woodpecker service (#779); agents-llama conditional block gated on `ENABLE_LLAMA_AGENT=1` (#769); `agents-llama-all` compose service (profile `agents-llama-all`, all 7 roles: review,dev,gardener,architect,planner,predictor,supervisor) added by #801; agents service gains volume mounts for `./projects`, `./.env`, `./state`), `generate_caddyfile()` — Caddyfile (routes: `/forge/*` → forgejo:3000, `/woodpecker/*` → woodpecker:8000, `/staging/*` → staging:80; `/chat/login` and `/chat/oauth/callback` bypass `forward_auth` so unauthenticated users can reach the OAuth flow; `/chat/*` gated by `forward_auth` on `chat:8080/chat/auth/verify` which stamps `X-Forwarded-User` (#709); root `/` redirects to `/forge/`), `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) |
+| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (uses `codeberg.org/forgejo/forgejo:11.0` tag; adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility), `generate_caddyfile()` — Caddyfile, `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) |
 | `lib/sprint-filer.sh` | Post-merge sub-issue filer for sprint PRs. Invoked by the `.woodpecker/ops-filer.yml` pipeline after a sprint PR merges to ops repo `main`. Parses `<!-- filer:begin --> ... <!-- filer:end -->` blocks from sprint PR bodies to extract sub-issue definitions, creates them on the project repo using `FORGE_FILER_TOKEN` (narrow-scope `filer-bot` identity with `issues:write` only), adds `in-progress` label to the parent vision issue, and handles vision lifecycle closure when all sub-issues are closed. Uses `filer_api_all()` for paginated fetches. Idempotent: uses `<!-- decomposed-from: #<vision>, sprint: <slug>, id: <id> -->` markers to skip already-filed issues. Requires `FORGE_FILER_TOKEN`, `FORGE_API`, `FORGE_API_BASE`, `FORGE_OPS_REPO`. | `.woodpecker/ops-filer.yml` (CI pipeline on ops repo) |
 | `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) |
 | `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) |
 | `lib/hvault.sh` | HashiCorp Vault helper module. `hvault_kv_get(PATH, [KEY])` — read KV v2 secret, optionally extract one key. `hvault_kv_put(PATH, KEY=VAL ...)` — write KV v2 secret. `hvault_kv_list(PATH)` — list keys at a KV path. `hvault_policy_apply(NAME, FILE)` — idempotent policy upsert. `hvault_jwt_login(ROLE, JWT)` — exchange JWT for short-lived token. `hvault_token_lookup()` — returns TTL/policies/accessor for current token. All functions use `VAULT_ADDR` + `VAULT_TOKEN` from env (fallback: `/etc/vault.d/root.token`), emit structured JSON errors to stderr on failure. Tests: `tests/lib-hvault.bats` (requires `vault server -dev`). | Not sourced at runtime yet — pure scaffolding for Nomad+Vault migration (#799) |
--- a/lib/agent-sdk.sh
+++ b/lib/agent-sdk.sh
@ -27,96 +27,6 @@ agent_recover_session() {
  fi
 }
 # claude_run_with_watchdog — run claude with idle-after-final-message watchdog
 #
 # Mitigates upstream Claude Code hang (#591) by detecting when the final
 # assistant message has been written and terminating the process after a
 # short grace period instead of waiting for CLAUDE_TIMEOUT.
 #
 # The watchdog:
 #   1. Streams claude stdout to a temp file
 #   2. Polls for the final result marker ("type":"result" for stream-json
 #      or closing } for regular json output)
 #   3. After detecting the final marker, starts a CLAUDE_IDLE_GRACE countdown
 #   4. SIGTERM claude if it hasn't exited cleanly within the grace period
 #   5. Falls back to CLAUDE_TIMEOUT as the absolute hard ceiling
 #
 # Usage: claude_run_with_watchdog claude [args...]
 # Expects: LOGFILE, CLAUDE_TIMEOUT, CLAUDE_IDLE_GRACE (default 30)
 # Returns: exit code from claude or timeout
 claude_run_with_watchdog() {
  local -a cmd=("$@")
  local out_file pid grace_pid rc
  # Create temp file for stdout capture
  out_file=$(mktemp) || return 1
  trap 'rm -f "$out_file"' RETURN
  # Start claude in background, capturing stdout to temp file
  "${cmd[@]}" > "$out_file" 2>>"$LOGFILE" &
  pid=$!
  # Background watchdog: poll for final result marker
  (
    local grace="${CLAUDE_IDLE_GRACE:-30}"
    local detected=0
    while kill -0 "$pid" 2>/dev/null; do
      # Check for stream-json result marker first (more reliable)
      if grep -q '"type":"result"' "$out_file" 2>/dev/null; then
        detected=1
        break
      fi
      # Fallback: check for closing brace of top-level result object
      if tail -c 100 "$out_file" 2>/dev/null | grep -q '}[[:space:]]*$'; then
        # Verify it looks like a JSON result (has session_id or result key)
        if grep -qE '"(session_id|result)":' "$out_file" 2>/dev/null; then
          detected=1
          break
        fi
      fi
      sleep 2
    done
    # If we detected a final message, wait grace period then kill if still running
    if [ "$detected" -eq 1 ] && kill -0 "$pid" 2>/dev/null; then
      log "watchdog: final result detected, ${grace}s grace period before SIGTERM"
      sleep "$grace"
      if kill -0 "$pid" 2>/dev/null; then
        log "watchdog: claude -p idle for ${grace}s after final result; SIGTERM"
        kill -TERM "$pid" 2>/dev/null || true
        # Give it a moment to clean up
        sleep 5
        if kill -0 "$pid" 2>/dev/null; then
          log "watchdog: force kill after SIGTERM timeout"
          kill -KILL "$pid" 2>/dev/null || true
        fi
      fi
    fi
  ) &
  grace_pid=$!
  # Hard ceiling timeout (existing behavior) — use tail --pid to wait for process
  timeout --foreground "${CLAUDE_TIMEOUT:-7200}" tail --pid="$pid" -f /dev/null 2>/dev/null
  rc=$?
  # Clean up the watchdog
  kill "$grace_pid" 2>/dev/null || true
  wait "$grace_pid" 2>/dev/null || true
  # When timeout fires (rc=124), explicitly kill the orphaned claude process
  # tail --pid is a passive waiter, not a supervisor
  if [ "$rc" -eq 124 ]; then
    kill "$pid" 2>/dev/null || true
    sleep 1
    kill -KILL "$pid" 2>/dev/null || true
  fi
  # Output the captured stdout
  cat "$out_file"
  return "$rc"
 }
 # agent_run — synchronous Claude invocation (one-shot claude -p)
 # Usage: agent_run [--resume SESSION_ID] [--worktree DIR] PROMPT
 # Sets: _AGENT_SESSION_ID (updated each call, persisted to SID_FILE)
@ -131,24 +41,16 @@ agent_run() {
  done
  local prompt="${1:-}"
  _AGENT_LAST_OUTPUT=""
  local -a args=(-p "$prompt" --output-format json --dangerously-skip-permissions --max-turns 200)
  [ -n "$resume_id" ] && args+=(--resume "$resume_id")
  [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL")
  local run_dir="${worktree_dir:-$(pwd)}"
  local lock_file="${HOME}/.claude/session.lock"
  mkdir -p "$(dirname "$lock_file")"
  local output rc
  log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})"
-  # External flock is redundant once CLAUDE_CONFIG_DIR rollout is verified (#647).
+  output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$?
  # Gate behind CLAUDE_EXTERNAL_LOCK for rollback safety; default off.
  if [ -n "${CLAUDE_EXTERNAL_LOCK:-}" ]; then
    mkdir -p "$(dirname "$lock_file")"
    output=$(cd "$run_dir" && ( flock -w 600 9 || exit 1; claude_run_with_watchdog claude "${args[@]}" ) 9>"$lock_file" 2>>"$LOGFILE") && rc=0 || rc=$?
  else
    output=$(cd "$run_dir" && claude_run_with_watchdog claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$?
  fi
  if [ "$rc" -eq 124 ]; then
    log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)"
  elif [ "$rc" -ne 0 ]; then
@ -173,9 +75,7 @@ agent_run() {
  # Save output for diagnostics (no_push, crashes)
  _AGENT_LAST_OUTPUT="$output"
-  local diag_dir="${DISINTO_LOG_DIR:-/tmp}/${LOG_AGENT:-dev}"
+  local diag_file="${DISINTO_LOG_DIR:-/tmp}/dev/agent-run-last.json"
  mkdir -p "$diag_dir" 2>/dev/null || true
  local diag_file="${diag_dir}/agent-run-last.json"
  printf '%s' "$output" > "$diag_file" 2>/dev/null || true
  # Nudge: if the model stopped without pushing, resume with encouragement.
@ -191,11 +91,7 @@ agent_run() {
        local nudge="You stopped but did not push any code. You have uncommitted changes. Commit them and push."
        log "agent_run: nudging (uncommitted changes)"
        local nudge_rc
-        if [ -n "${CLAUDE_EXTERNAL_LOCK:-}" ]; then
+        output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
          output=$(cd "$run_dir" && ( flock -w 600 9 || exit 1; claude_run_with_watchdog claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} ) 9>"$lock_file" 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
        else
          output=$(cd "$run_dir" && claude_run_with_watchdog claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
        fi
        if [ "$nudge_rc" -eq 124 ]; then
          log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)"
        elif [ "$nudge_rc" -ne 0 ]; then
--- a/lib/branch-protection.sh
+++ b/lib/branch-protection.sh
@ -34,55 +34,6 @@ _ops_api() {
  printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}"
 }
 # -----------------------------------------------------------------------------
 # _bp_wait_for_branch — Wait for Forgejo to index a branch with exponential backoff
 #
 # Forgejo's branch indexer can take 5–15s to register a newly-pushed branch.
 # This helper retries up to 10 times with exponential backoff (2s, 4s, 6s, …)
 # capped at 10s per wait, for a worst-case total of ~70s.
 #
 # Args:
 #   $1 - Full API URL for the repo (e.g. https://forge.example/api/v1/repos/owner/repo)
 #   $2 - Branch name
 #   $3 - Human-readable repo identifier for log messages
 #
 # Returns: 0 if branch found, 1 if not found after all retries
 # -----------------------------------------------------------------------------
 _bp_wait_for_branch() {
  local api_url="$1"
  local branch="$2"
  local repo_label="$3"
  local max_retries=10
  local base_wait=2
  local attempt=1
  local branch_status="0"
  while [ "$attempt" -le "$max_retries" ]; do
    branch_status=$(curl -s -o /dev/null -w "%{http_code}" \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
    if [ "$branch_status" = "200" ]; then
      _bp_log "Branch ${branch} exists on ${repo_label}"
      return 0
    fi
    if [ "$attempt" -lt "$max_retries" ]; then
      local wait_time=$(( base_wait * attempt ))
      if [ "$wait_time" -gt 10 ]; then
        wait_time=10
      fi
      _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_retries}), waiting ${wait_time}s..."
      sleep "$wait_time"
    fi
    attempt=$((attempt + 1))
  done
  _bp_log "ERROR: Branch ${branch} does not exist on ${repo_label} after ${max_retries} attempts"
  return 1
 }
 # -----------------------------------------------------------------------------
 # setup_vault_branch_protection — Set up admin-only branch protection for main
 #
@ -100,8 +51,30 @@ setup_vault_branch_protection() {
  _bp_log "Setting up branch protection for ${branch} on ${FORGE_OPS_REPO}"
-  # Wait for Forgejo to index the branch (may take 5–15s after push)
+  # Check if branch exists with retry loop (handles race condition after initial push)
-  if ! _bp_wait_for_branch "$api_url" "$branch" "$FORGE_OPS_REPO"; then
+  local branch_exists="0"
  local max_attempts=3
  local attempt=1
  while [ "$attempt" -le "$max_attempts" ]; do
    branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
    if [ "$branch_exists" = "200" ]; then
      _bp_log "Branch ${branch} exists on ${FORGE_OPS_REPO}"
      break
    fi
    if [ "$attempt" -lt "$max_attempts" ]; then
      _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..."
      sleep 2
    fi
    attempt=$((attempt + 1))
  done
  if [ "$branch_exists" != "200" ]; then
    _bp_log "ERROR: Branch ${branch} does not exist on ${FORGE_OPS_REPO} after ${max_attempts} attempts"
    return 1
  fi
@ -271,8 +244,30 @@ setup_profile_branch_protection() {
  local api_url
  api_url="${FORGE_URL}/api/v1/repos/${repo}"
-  # Wait for Forgejo to index the branch (may take 5–15s after push)
+  # Check if branch exists with retry loop (handles race condition after initial push)
-  if ! _bp_wait_for_branch "$api_url" "$branch" "$repo"; then
+  local branch_exists="0"
  local max_attempts=3
  local attempt=1
  while [ "$attempt" -le "$max_attempts" ]; do
    branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
    if [ "$branch_exists" = "200" ]; then
      _bp_log "Branch ${branch} exists on ${repo}"
      break
    fi
    if [ "$attempt" -lt "$max_attempts" ]; then
      _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..."
      sleep 2
    fi
    attempt=$((attempt + 1))
  done
  if [ "$branch_exists" != "200" ]; then
    _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts"
    return 1
  fi
@ -435,8 +430,30 @@ setup_project_branch_protection() {
  local api_url
  api_url="${FORGE_URL}/api/v1/repos/${repo}"
-  # Wait for Forgejo to index the branch (may take 5–15s after push)
+  # Check if branch exists with retry loop (handles race condition after initial push)
-  if ! _bp_wait_for_branch "$api_url" "$branch" "$repo"; then
+  local branch_exists="0"
  local max_attempts=3
  local attempt=1
  while [ "$attempt" -le "$max_attempts" ]; do
    branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
      -H "Authorization: token ${FORGE_TOKEN}" \
      "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
    if [ "$branch_exists" = "200" ]; then
      _bp_log "Branch ${branch} exists on ${repo}"
      break
    fi
    if [ "$attempt" -lt "$max_attempts" ]; then
      _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..."
      sleep 2
    fi
    attempt=$((attempt + 1))
  done
  if [ "$branch_exists" != "200" ]; then
    _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts"
    return 1
  fi
--- a/lib/ci-setup.sh
+++ b/lib/ci-setup.sh
@ -4,9 +4,7 @@
 #
 # Internal functions (called via _load_ci_context + _*_impl):
 #   _install_cron_impl()              - Install crontab entries (bare-metal only; compose uses polling loop)
 #   _create_forgejo_oauth_app()       - Generic: create an OAuth2 app on Forgejo (shared helper)
 #   _create_woodpecker_oauth_impl()   - Create OAuth2 app on Forgejo for Woodpecker
 #   _create_chat_oauth_impl()         - Create OAuth2 app on Forgejo for disinto-chat
 #   _generate_woodpecker_token_impl() - Auto-generate WOODPECKER_TOKEN via OAuth2 flow
 #   _activate_woodpecker_repo_impl()  - Activate repo in Woodpecker
 #
@ -47,9 +45,9 @@ _install_cron_impl() {
  # Bare mode: crontab is required on the host
  if ! command -v crontab &>/dev/null; then
-    echo "Warning: crontab not found (required for bare-metal scheduling)" >&2
+    echo "Error: crontab not found (required for bare-metal mode)" >&2
    echo "  Install: apt install cron  /  brew install cron" >&2
-    return 1
+    exit 1
  fi
  # Use absolute path for the TOML in cron entries
@ -92,54 +90,6 @@ _install_cron_impl() {
  fi
 }
 # Create an OAuth2 application on Forgejo.
 # Generic helper used by both Woodpecker and chat OAuth setup.
 # Sets _OAUTH_CLIENT_ID and _OAUTH_CLIENT_SECRET on success.
 # Usage: _create_forgejo_oauth_app <app_name> <redirect_uri>
 _create_forgejo_oauth_app() {
  local oauth2_name="$1"
  local redirect_uri="$2"
  local forge_url="${FORGE_URL}"
  _OAUTH_CLIENT_ID=""
  _OAUTH_CLIENT_SECRET=""
  local existing_app
  existing_app=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \
    | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true
  if [ -n "$existing_app" ]; then
    echo "OAuth2:  ${oauth2_name} (already exists, client_id=${existing_app})"
    _OAUTH_CLIENT_ID="$existing_app"
    return 0
  fi
  local oauth2_resp
  oauth2_resp=$(curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${forge_url}/api/v1/user/applications/oauth2" \
    -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \
    2>/dev/null) || oauth2_resp=""
  if [ -z "$oauth2_resp" ]; then
    echo "Warning: failed to create OAuth2 app '${oauth2_name}' on Forgejo" >&2
    return 1
  fi
  _OAUTH_CLIENT_ID=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty')
  _OAUTH_CLIENT_SECRET=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty')
  if [ -z "$_OAUTH_CLIENT_ID" ]; then
    echo "Warning: OAuth2 app creation returned no client_id" >&2
    return 1
  fi
  echo "OAuth2:  ${oauth2_name} created (client_id=${_OAUTH_CLIENT_ID})"
 }
 # Set up Woodpecker CI to use Forgejo as its forge backend.
 # Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo.
 # Usage: create_woodpecker_oauth <forge_url> <repo_slug>
@ -150,9 +100,44 @@ _create_woodpecker_oauth_impl() {
  echo ""
  echo "── Woodpecker OAuth2 setup ────────────────────────────"
-  _create_forgejo_oauth_app "woodpecker-ci" "http://localhost:8000/authorize" || return 0
+  # Create OAuth2 application on Forgejo for Woodpecker
-  local client_id="${_OAUTH_CLIENT_ID}"
+  local oauth2_name="woodpecker-ci"
-  local client_secret="${_OAUTH_CLIENT_SECRET}"
+  local redirect_uri="http://localhost:8000/authorize"
  local existing_app client_id client_secret
  # Check if OAuth2 app already exists
  existing_app=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \
    | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true
  if [ -n "$existing_app" ]; then
    echo "OAuth2:  ${oauth2_name} (already exists, client_id=${existing_app})"
    client_id="$existing_app"
  else
    local oauth2_resp
    oauth2_resp=$(curl -sf -X POST \
      -H "Authorization: token ${FORGE_TOKEN}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/user/applications/oauth2" \
      -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \
      2>/dev/null) || oauth2_resp=""
    if [ -z "$oauth2_resp" ]; then
      echo "Warning: failed to create OAuth2 app on Forgejo" >&2
      return
    fi
    client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty')
    client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty')
    if [ -z "$client_id" ]; then
      echo "Warning: OAuth2 app creation returned no client_id" >&2
      return
    fi
    echo "OAuth2:  ${oauth2_name} created (client_id=${client_id})"
  fi
  # Store Woodpecker forge config in .env
  # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references
@ -181,39 +166,6 @@ _create_woodpecker_oauth_impl() {
  echo "Config:  Woodpecker forge vars written to .env"
 }
 # Create OAuth2 app on Forgejo for disinto-chat.
 # Writes CHAT_OAUTH_CLIENT_ID / CHAT_OAUTH_CLIENT_SECRET to .env.
 # Usage: _create_chat_oauth_impl <redirect_uri>
 _create_chat_oauth_impl() {
  local redirect_uri="$1"
  echo ""
  echo "── Chat OAuth2 setup ──────────────────────────────────"
  _create_forgejo_oauth_app "disinto-chat" "$redirect_uri" || return 0
  local client_id="${_OAUTH_CLIENT_ID}"
  local client_secret="${_OAUTH_CLIENT_SECRET}"
  local env_file="${FACTORY_ROOT}/.env"
  local chat_vars=()
  if [ -n "${client_id:-}" ]; then
    chat_vars+=("CHAT_OAUTH_CLIENT_ID=${client_id}")
  fi
  if [ -n "${client_secret:-}" ]; then
    chat_vars+=("CHAT_OAUTH_CLIENT_SECRET=${client_secret}")
  fi
  for var_line in "${chat_vars[@]}"; do
    local var_name="${var_line%%=*}"
    if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then
      sed -i "s|^${var_name}=.*|${var_line}|" "$env_file"
    else
      printf '%s\n' "$var_line" >> "$env_file"
    fi
  done
  echo "Config:  Chat OAuth vars written to .env"
 }
 # Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow.
 # Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created).
 # Called after compose stack is up, before activate_woodpecker_repo.
--- a/lib/claude-config.sh
+++ b/lib/claude-config.sh
@ -1,103 +0,0 @@
 #!/usr/bin/env bash
 # lib/claude-config.sh — Shared Claude config directory helpers (#641)
 #
 # Provides setup_claude_config_dir() for creating/migrating CLAUDE_CONFIG_DIR
 # and _env_set_idempotent() for writing env vars to .env files.
 #
 # Requires: CLAUDE_CONFIG_DIR, CLAUDE_SHARED_DIR (set by lib/env.sh)
 # Idempotent .env writer.
 # Usage: _env_set_idempotent KEY VALUE FILE
 _env_set_idempotent() {
  local key="$1" value="$2" file="$3"
  if grep -q "^${key}=" "$file" 2>/dev/null; then
    local existing
    existing=$(grep "^${key}=" "$file" | head -1 | cut -d= -f2-)
    if [ "$existing" != "$value" ]; then
      sed -i "s|^${key}=.*|${key}=${value}|" "$file"
    fi
  else
    printf '%s=%s\n' "$key" "$value" >> "$file"
  fi
 }
 # Create the shared CLAUDE_CONFIG_DIR, optionally migrating ~/.claude.
 # Usage: setup_claude_config_dir [auto_yes]
 setup_claude_config_dir() {
  local auto_yes="${1:-false}"
  local home_claude="${HOME}/.claude"
  # Create the shared config directory (idempotent)
  install -d -m 0700 -o "$USER" "$CLAUDE_CONFIG_DIR"
  echo "Claude:  ${CLAUDE_CONFIG_DIR} (ready)"
  # If ~/.claude is already a symlink to CLAUDE_CONFIG_DIR, nothing to do
  if [ -L "$home_claude" ]; then
    local link_target
    link_target=$(readlink -f "$home_claude")
    local config_real
    config_real=$(readlink -f "$CLAUDE_CONFIG_DIR")
    if [ "$link_target" = "$config_real" ]; then
      echo "Claude:  ${home_claude} -> ${CLAUDE_CONFIG_DIR} (symlink OK)"
      return 0
    fi
  fi
  local home_exists=false home_nonempty=false
  local config_nonempty=false
  # Check ~/.claude (skip if it's a symlink — already handled above)
  if [ -d "$home_claude" ] && [ ! -L "$home_claude" ]; then
    home_exists=true
    if [ -n "$(ls -A "$home_claude" 2>/dev/null)" ]; then
      home_nonempty=true
    fi
  fi
  # Check CLAUDE_CONFIG_DIR contents
  if [ -n "$(ls -A "$CLAUDE_CONFIG_DIR" 2>/dev/null)" ]; then
    config_nonempty=true
  fi
  # Case: both non-empty — abort, operator must reconcile
  if [ "$home_nonempty" = true ] && [ "$config_nonempty" = true ]; then
    echo "ERROR: both ${home_claude} and ${CLAUDE_CONFIG_DIR} exist and are non-empty" >&2
    echo "  Reconcile manually: merge or remove one, then re-run disinto init" >&2
    return 1
  fi
  # Case: ~/.claude exists and CLAUDE_CONFIG_DIR is empty — offer migration
  if [ "$home_nonempty" = true ] && [ "$config_nonempty" = false ]; then
    local do_migrate=false
    if [ "$auto_yes" = true ]; then
      do_migrate=true
    elif [ -t 0 ]; then
      read -rp "Migrate ${home_claude} to ${CLAUDE_CONFIG_DIR}? [Y/n] " confirm
      if [[ ! "$confirm" =~ ^[Nn] ]]; then
        do_migrate=true
      fi
    else
      echo "Warning: ${home_claude} exists but cannot prompt for migration (no TTY)" >&2
      echo "  Re-run with --yes to auto-migrate, or move files manually" >&2
      return 0
    fi
    if [ "$do_migrate" = true ]; then
      # Move contents (not the dir itself) to preserve CLAUDE_CONFIG_DIR ownership
      cp -a "$home_claude/." "$CLAUDE_CONFIG_DIR/"
      rm -rf "$home_claude"
      ln -sfn "$CLAUDE_CONFIG_DIR" "$home_claude"
      echo "Claude:  migrated ${home_claude} -> ${CLAUDE_CONFIG_DIR}"
      return 0
    fi
  fi
  # Case: ~/.claude exists but is empty, or doesn't exist — create symlink
  if [ "$home_exists" = true ] && [ "$home_nonempty" = false ]; then
    rmdir "$home_claude" 2>/dev/null || true
  fi
  if [ ! -e "$home_claude" ]; then
    ln -sfn "$CLAUDE_CONFIG_DIR" "$home_claude"
    echo "Claude:  ${home_claude} -> ${CLAUDE_CONFIG_DIR} (symlink created)"
  fi
 }
--- a/lib/env.sh
+++ b/lib/env.sh
@ -1,41 +1,12 @@
 #!/usr/bin/env bash
 # =============================================================================
 # env.sh — Load environment and shared utilities
 # Source this at the top of every script: source "$(dirname "$0")/lib/env.sh"
 #
 # SURFACE CONTRACT
 #
 # Required preconditions — the entrypoint (or caller) MUST set these before
 # sourcing this file:
 #   USER              — OS user name (e.g. "agent", "johba")
 #   HOME              — home directory (e.g. "/home/agent")
 #
 # Required when PROJECT_TOML is set (i.e. agent scripts loading a project):
 #   PROJECT_REPO_ROOT — absolute path to the project git clone
 #   PRIMARY_BRANCH    — default branch name (e.g. "main")
 #   OPS_REPO_ROOT     — absolute path to the ops repo clone
 #   (these are normally populated by load-project.sh from the TOML)
 #
 # What this file sets / exports:
 #   FACTORY_ROOT, DISINTO_LOG_DIR
 #   .env / .env.enc secrets (FORGE_TOKEN, etc.)
 #   FORGE_API, FORGE_WEB, TEA_LOGIN, FORGE_OPS_REPO (derived from FORGE_URL/FORGE_REPO)
 #   Per-agent tokens (FORGE_REVIEW_TOKEN, FORGE_GARDENER_TOKEN, …)
 #   CLAUDE_SHARED_DIR, CLAUDE_CONFIG_DIR
 #   Helper functions: log(), validate_url(), forge_api(), forge_api_all(),
 #     woodpecker_api(), wpdb(), memory_guard()
 # =============================================================================
 set -euo pipefail
 # Resolve script root (parent of lib/)
 FACTORY_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
 # ── Precondition assertions ──────────────────────────────────────────────────
 # These must be set by the entrypoint before sourcing this file.
 : "${USER:?must be set by entrypoint before sourcing lib/env.sh}"
 : "${HOME:?must be set by entrypoint before sourcing lib/env.sh}"
 # Container detection: when running inside the agent container, DISINTO_CONTAINER
 # is set by docker-compose.yml.  Adjust paths so phase files, logs, and thread
 # maps land on the persistent volume instead of /tmp (which is ephemeral).
@ -101,6 +72,7 @@ fi
 # PATH: foundry, node, system
 export PATH="${HOME}/.local/bin:${HOME}/.foundry/bin:${HOME}/.nvm/versions/node/v22.20.0/bin:/usr/local/bin:/usr/bin:/bin:${PATH}"
 export HOME="${HOME:-/home/debian}"
 # Load project TOML if PROJECT_TOML is set (by poll scripts that accept project arg)
 if [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then
@ -121,16 +93,14 @@ export FORGE_VAULT_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}"
 export FORGE_SUPERVISOR_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}"
 export FORGE_PREDICTOR_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}"
 export FORGE_ARCHITECT_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}"
 export FORGE_FILER_TOKEN="${FORGE_FILER_TOKEN:-${FORGE_TOKEN}}"
 # Bot usernames filter
-export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot,filer-bot}"
+export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}"
 # Project config
 export FORGE_REPO="${FORGE_REPO:-}"
 export FORGE_URL="${FORGE_URL:-http://localhost:3000}"
-export FORGE_API_BASE="${FORGE_API_BASE:-${FORGE_URL}/api/v1}"
+export FORGE_API="${FORGE_API:-${FORGE_URL}/api/v1/repos/${FORGE_REPO}}"
 export FORGE_API="${FORGE_API:-${FORGE_API_BASE}/repos/${FORGE_REPO}}"
 export FORGE_WEB="${FORGE_WEB:-${FORGE_URL}/${FORGE_REPO}}"
 # tea CLI login name: derived from FORGE_URL (codeberg vs local forgejo)
 if [ -z "${TEA_LOGIN:-}" ]; then
@ -142,14 +112,12 @@ fi
 export TEA_LOGIN
 export PROJECT_NAME="${PROJECT_NAME:-${FORGE_REPO##*/}}"
 export PROJECT_REPO_ROOT="${PROJECT_REPO_ROOT:-/home/${USER}/${PROJECT_NAME}}"
 export PRIMARY_BRANCH="${PRIMARY_BRANCH:-master}"
-# Project-specific paths: no guessing from USER/HOME — must be set by
+# Ops repo: operational data (vault items, journals, evidence, prerequisites).
-# the entrypoint or loaded from PROJECT_TOML (via load-project.sh above).
+# Default convention: sibling directory named {project}-ops.
-if [ -n "${PROJECT_TOML:-}" ]; then
+export OPS_REPO_ROOT="${OPS_REPO_ROOT:-/home/${USER}/${PROJECT_NAME}-ops}"
  : "${PROJECT_REPO_ROOT:?must be set by entrypoint or PROJECT_TOML before sourcing lib/env.sh}"
  : "${PRIMARY_BRANCH:?must be set by entrypoint or PROJECT_TOML before sourcing lib/env.sh}"
  : "${OPS_REPO_ROOT:?must be set by entrypoint or PROJECT_TOML before sourcing lib/env.sh}"
 fi
 # Forge repo slug for the ops repo (used by agents that commit to ops).
 export FORGE_OPS_REPO="${FORGE_OPS_REPO:-${FORGE_REPO:+${FORGE_REPO}-ops}}"
@ -158,19 +126,12 @@ export WOODPECKER_SERVER="${WOODPECKER_SERVER:-http://localhost:8000}"
 export CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"
 # Vault-only token guard (#745): external-action tokens (GITHUB_TOKEN, CLAWHUB_TOKEN)
-# must NEVER be available to agents. They live in secrets/*.enc and are decrypted
+# must NEVER be available to agents. They live in .env.vault.enc and are injected
-# only into the ephemeral runner container at fire time (#777). Unset them here so
+# only into the ephemeral runner container at fire time. Unset them here so
 # even an accidental .env inclusion cannot leak them into agent sessions.
 unset GITHUB_TOKEN 2>/dev/null || true
 unset CLAWHUB_TOKEN 2>/dev/null || true
 # Shared Claude config directory for cross-container OAuth lock coherence (#641).
 # All containers and the host resolve to the same CLAUDE_CONFIG_DIR on a shared
 # bind-mounted filesystem, so proper-lockfile's atomic mkdir works across them.
 : "${CLAUDE_SHARED_DIR:=/var/lib/disinto/claude-shared}"
 : "${CLAUDE_CONFIG_DIR:=${CLAUDE_SHARED_DIR}/config}"
 export CLAUDE_SHARED_DIR CLAUDE_CONFIG_DIR
 # Disable Claude Code auto-updater, telemetry, error reporting in factory sessions.
 # Factory processes must never phone home or auto-update mid-session (#725).
 export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1
@ -313,68 +274,6 @@ memory_guard() {
  fi
 }
 # =============================================================================
 # SECRET LOADING ABSTRACTION
 # =============================================================================
 # load_secret NAME [DEFAULT]
 #
 # Resolves a secret value using the following precedence:
 #   1. /secrets/<NAME>.env  — Nomad-rendered template (future)
 #   2. Current environment  — already set by .env.enc, compose, etc.
 #   3. secrets/<NAME>.enc   — age-encrypted per-key file (decrypted on demand)
 #   4. DEFAULT (or empty)
 #
 # Prints the resolved value to stdout.  Caches age-decrypted values in the
 # process environment so subsequent calls are free.
 # =============================================================================
 load_secret() {
  local name="$1"
  local default="${2:-}"
  # 1. Nomad-rendered template (future: Nomad writes /secrets/<NAME>.env)
  local nomad_path="/secrets/${name}.env"
  if [ -f "$nomad_path" ]; then
    # Source into a subshell to extract just the value
    local _nomad_val
    _nomad_val=$(
      set -a
      # shellcheck source=/dev/null
      source "$nomad_path"
      set +a
      printf '%s' "${!name:-}"
    )
    if [ -n "$_nomad_val" ]; then
      export "$name=$_nomad_val"
      printf '%s' "$_nomad_val"
      return 0
    fi
  fi
  # 2. Already in environment (set by .env.enc, compose injection, etc.)
  if [ -n "${!name:-}" ]; then
    printf '%s' "${!name}"
    return 0
  fi
  # 3. Age-encrypted per-key file: secrets/<NAME>.enc (#777)
  local _age_key="${HOME}/.config/sops/age/keys.txt"
  local _enc_path="${FACTORY_ROOT}/secrets/${name}.enc"
  if [ -f "$_enc_path" ] && [ -f "$_age_key" ] && command -v age &>/dev/null; then
    local _dec_val
    if _dec_val=$(age -d -i "$_age_key" "$_enc_path" 2>/dev/null) && [ -n "$_dec_val" ]; then
      export "$name=$_dec_val"
      printf '%s' "$_dec_val"
      return 0
    fi
  fi
  # 4. Default (or empty)
  if [ -n "$default" ]; then
    printf '%s' "$default"
  fi
  return 0
 }
 # Source tea helpers (available when tea binary is installed)
 if command -v tea &>/dev/null; then
  # shellcheck source=tea-helpers.sh
--- a/lib/forge-push.sh
+++ b/lib/forge-push.sh
@ -7,6 +7,7 @@
 # Globals expected:
 #   FORGE_URL    - Forge instance URL (e.g. http://localhost:3000)
 #   FORGE_TOKEN  - API token for Forge operations (used for API verification)
 #   FORGE_PASS   - Bot password for git HTTP push (#361: tokens rejected by Forgejo 11.x)
 #   FACTORY_ROOT - Root of the disinto factory
 #   PRIMARY_BRANCH - Primary branch name (e.g. main)
 #
@ -20,6 +21,7 @@ set -euo pipefail
 _assert_forge_push_globals() {
  local missing=()
  [ -z "${FORGE_URL:-}" ]      && missing+=("FORGE_URL")
  [ -z "${FORGE_PASS:-}" ]     && missing+=("FORGE_PASS")
  [ -z "${FORGE_TOKEN:-}" ]    && missing+=("FORGE_TOKEN")
  [ -z "${FACTORY_ROOT:-}" ]   && missing+=("FACTORY_ROOT")
  [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
@ -33,11 +35,17 @@ _assert_forge_push_globals() {
 push_to_forge() {
  local repo_root="$1" forge_url="$2" repo_slug="$3"
-  # Use clean URL — credential helper supplies auth (#604).
+  # Build authenticated remote URL: http://dev-bot:<password>@host:port/org/repo.git
-  # Forgejo 11.x rejects API tokens for git HTTP push (#361); password auth works
+  # Forgejo 11.x rejects API tokens for git HTTP push (#361); password auth works.
-  # via the credential helper configured in configure_git_creds().
+  if [ -z "${FORGE_PASS:-}" ]; then
-  local remote_url="${forge_url}/${repo_slug}.git"
+    echo "Error: FORGE_PASS not set — cannot push to Forgejo (see #361)" >&2
-  local display_url="$remote_url"
+    return 1
  fi
  local auth_url
  auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_PASS}@|")
  local remote_url="${auth_url}/${repo_slug}.git"
  # Display URL without token
  local display_url="${forge_url}/${repo_slug}.git"
  # Always set the remote URL to ensure credentials are current
  if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then
--- a/lib/forge-setup.sh
+++ b/lib/forge-setup.sh
@ -31,41 +31,17 @@ _load_init_context() {
 # Execute a command in the Forgejo container (for admin operations)
 _forgejo_exec() {
  local use_bare="${DISINTO_BARE:-false}"
  local cname="${FORGEJO_CONTAINER_NAME:-disinto-forgejo}"
  if [ "$use_bare" = true ]; then
-    docker exec -u git "$cname" "$@"
+    docker exec -u git disinto-forgejo "$@"
  else
    docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@"
  fi
 }
 # Check if a token already exists in .env (for idempotency)
 # Returns 0 if token exists, 1 if it doesn't
 _token_exists_in_env() {
  local token_var="$1"
  local env_file="$2"
  grep -q "^${token_var}=" "$env_file" 2>/dev/null
 }
 # Check if a password already exists in .env (for idempotency)
 # Returns 0 if password exists, 1 if it doesn't
 _pass_exists_in_env() {
  local pass_var="$1"
  local env_file="$2"
  grep -q "^${pass_var}=" "$env_file" 2>/dev/null
 }
 # Provision or connect to a local Forgejo instance.
 # Creates admin + bot users, generates API tokens, stores in .env.
 # When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose.
 # Usage: setup_forge [--rotate-tokens] <forge_url> <repo_slug>
 setup_forge() {
  local rotate_tokens=false
  # Parse optional --rotate-tokens flag
  if [ "$1" = "--rotate-tokens" ]; then
    rotate_tokens=true
    shift
  fi
  local forge_url="$1"
  local repo_slug="$2"
  local use_bare="${DISINTO_BARE:-false}"
@ -74,7 +50,7 @@ setup_forge() {
  echo "── Forge setup ────────────────────────────────────────"
  # Check if Forgejo is already running
-  if curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/version" >/dev/null 2>&1; then
+  if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then
    echo "Forgejo:  ${forge_url} (already running)"
  else
    echo "Forgejo not reachable at ${forge_url}"
@ -95,12 +71,11 @@ setup_forge() {
      # Bare-metal mode: standalone docker run
      mkdir -p "${FORGEJO_DATA_DIR}"
-      local cname="${FORGEJO_CONTAINER_NAME:-disinto-forgejo}"
+      if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then
-      if docker ps -a --format '{{.Names}}' | grep -q "^${cname}$"; then
+        docker start disinto-forgejo >/dev/null 2>&1 || true
        docker start "$cname" >/dev/null 2>&1 || true
      else
        docker run -d \
-          --name "$cname" \
+          --name disinto-forgejo \
          --restart unless-stopped \
          -p "${forge_port}:3000" \
          -p 2222:22 \
@ -119,7 +94,7 @@ setup_forge() {
    # Wait for Forgejo to become healthy
    echo -n "Waiting for Forgejo to start"
    local retries=0
-    while ! curl -sf --max-time 3 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/version" >/dev/null 2>&1; do
+    while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do
      retries=$((retries + 1))
      if [ "$retries" -gt 60 ]; then
        echo ""
@ -163,7 +138,7 @@ setup_forge() {
    admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
  fi
-  if ! curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
+  if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
    echo "Creating admin user: ${admin_user}"
    local create_output
    if ! create_output=$(_forgejo_exec forgejo admin user create \
@ -184,7 +159,7 @@ setup_forge() {
      --must-change-password=false
    # Verify admin user was actually created
-    if ! curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
+    if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
      echo "Error: admin user '${admin_user}' not found after creation" >&2
      exit 1
    fi
@ -212,10 +187,10 @@ setup_forge() {
  # Create human user (disinto-admin) as site admin if it doesn't exist
  local human_user="disinto-admin"
-  # human_user == admin_user; reuse admin_pass for basic-auth operations
+  local human_pass
-  local human_pass="$admin_pass"
+  human_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
-  if ! curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
+  if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
    echo "Creating human user: ${human_user}"
    local create_output
    if ! create_output=$(_forgejo_exec forgejo admin user create \
@ -236,7 +211,7 @@ setup_forge() {
      --must-change-password=false
    # Verify human user was actually created
-    if ! curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN:-}" "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
+    if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
      echo "Error: human user '${human_user}' not found after creation" >&2
      exit 1
    fi
@ -245,74 +220,50 @@ setup_forge() {
    echo "Human user: ${human_user} (already exists)"
  fi
-  # Preserve admin token if already stored in .env (idempotent re-run)
+  # Delete existing admin token if present (token sha1 is only returned at creation time)
-  local admin_token=""
+  local existing_token_id
-  if _token_exists_in_env "FORGE_ADMIN_TOKEN" "$env_file" && [ "$rotate_tokens" = false ]; then
+  existing_token_id=$(curl -sf \
-    admin_token=$(grep '^FORGE_ADMIN_TOKEN=' "$env_file" | head -1 | cut -d= -f2-)
+    -u "${admin_user}:${admin_pass}" \
-    [ -n "$admin_token" ] && echo "Admin token: preserved (use --rotate-tokens to force)"
+    "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \
    | jq -r '.[] | select(.name == "disinto-admin-token") | .id') || existing_token_id=""
  if [ -n "$existing_token_id" ]; then
    curl -sf -X DELETE \
      -u "${admin_user}:${admin_pass}" \
      "${forge_url}/api/v1/users/${admin_user}/tokens/${existing_token_id}" >/dev/null 2>&1 || true
  fi
  # Create admin token (fresh, so sha1 is returned)
  local admin_token
  admin_token=$(curl -sf -X POST \
    -u "${admin_user}:${admin_pass}" \
    -H "Content-Type: application/json" \
    "${forge_url}/api/v1/users/${admin_user}/tokens" \
    -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \
    | jq -r '.sha1 // empty') || admin_token=""
  if [ -z "$admin_token" ]; then
-    # Delete existing admin token if present (token sha1 is only returned at creation time)
+    echo "Error: failed to obtain admin API token" >&2
-    local existing_token_id
+    exit 1
    existing_token_id=$(curl -sf \
      -u "${admin_user}:${admin_pass}" \
      "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \
      | jq -r '.[] | select(.name == "disinto-admin-token") | .id') || existing_token_id=""
    if [ -n "$existing_token_id" ]; then
      curl -sf -X DELETE \
        -u "${admin_user}:${admin_pass}" \
        "${forge_url}/api/v1/users/${admin_user}/tokens/${existing_token_id}" >/dev/null 2>&1 || true
    fi
    # Create admin token (fresh, so sha1 is returned)
    admin_token=$(curl -sf -X POST \
      -u "${admin_user}:${admin_pass}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/users/${admin_user}/tokens" \
      -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \
      | jq -r '.sha1 // empty') || admin_token=""
    if [ -z "$admin_token" ]; then
      echo "Error: failed to obtain admin API token" >&2
      exit 1
    fi
    # Store admin token for idempotent re-runs
    if grep -q '^FORGE_ADMIN_TOKEN=' "$env_file" 2>/dev/null; then
      sed -i "s|^FORGE_ADMIN_TOKEN=.*|FORGE_ADMIN_TOKEN=${admin_token}|" "$env_file"
    else
      printf 'FORGE_ADMIN_TOKEN=%s\n' "$admin_token" >> "$env_file"
    fi
    echo "Admin token: generated and saved (FORGE_ADMIN_TOKEN)"
  fi
-  # Get or create human user token (human_user == admin_user; use admin_pass)
+  # Get or create human user token
-  local human_token=""
+  local human_token
-  if _token_exists_in_env "HUMAN_TOKEN" "$env_file" && [ "$rotate_tokens" = false ]; then
+  if curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
    human_token=$(grep '^HUMAN_TOKEN=' "$env_file" | head -1 | cut -d= -f2-)
    if [ -n "$human_token" ]; then
      export HUMAN_TOKEN="$human_token"
      echo "  Human token preserved (use --rotate-tokens to force)"
    fi
  fi
  if [ -z "$human_token" ]; then
    # Delete existing human token if present (token sha1 is only returned at creation time)
    local existing_human_token_id
    existing_human_token_id=$(curl -sf \
-      -u "${admin_user}:${admin_pass}" \
+      -u "${human_user}:${human_pass}" \
      "${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \
      | jq -r '.[] | select(.name == "disinto-human-token") | .id') || existing_human_token_id=""
    if [ -n "$existing_human_token_id" ]; then
      curl -sf -X DELETE \
-        -u "${admin_user}:${admin_pass}" \
+        -u "${human_user}:${human_pass}" \
        "${forge_url}/api/v1/users/${human_user}/tokens/${existing_human_token_id}" >/dev/null 2>&1 || true
    fi
-    # Create human token (use admin_pass since human_user == admin_user)
+    # Create human token (fresh, so sha1 is returned)
    human_token=$(curl -sf -X POST \
-      -u "${admin_user}:${admin_pass}" \
+      -u "${human_user}:${human_pass}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/users/${human_user}/tokens" \
      -d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \
@ -326,7 +277,7 @@ setup_forge() {
        printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file"
      fi
      export HUMAN_TOKEN="$human_token"
-      echo "  Human token generated and saved (HUMAN_TOKEN)"
+      echo "  Human token saved (HUMAN_TOKEN)"
    fi
  fi
@ -356,36 +307,14 @@ setup_forge() {
    [predictor-bot]="FORGE_PREDICTOR_PASS"
    [architect-bot]="FORGE_ARCHITECT_PASS"
  )
  # Llama bot users (local-model agents) — separate from main agents
  # Each llama agent gets its own Forgejo user, token, and password
  local -A llama_token_vars=(
    [dev-qwen]="FORGE_TOKEN_LLAMA"
    [dev-qwen-nightly]="FORGE_TOKEN_LLAMA_NIGHTLY"
  )
  local -A llama_pass_vars=(
    [dev-qwen]="FORGE_PASS_LLAMA"
    [dev-qwen-nightly]="FORGE_PASS_LLAMA_NIGHTLY"
  )
  local bot_user bot_pass token token_var pass_var
  for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do
    bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
    token_var="${bot_token_vars[$bot_user]}"
    pass_var="${bot_pass_vars[$bot_user]}"
-    # Check if token already exists in .env
+    # Check if bot user exists
    local token_exists=false
    if _token_exists_in_env "$token_var" "$env_file"; then
      token_exists=true
    fi
    # Check if password already exists in .env
    local pass_exists=false
    if _pass_exists_in_env "$pass_var" "$env_file"; then
      pass_exists=true
    fi
    # Check if bot user exists on Forgejo
    local user_exists=false
    if curl -sf --max-time 5 \
      -H "Authorization: token ${admin_token}" \
@ -393,25 +322,7 @@ setup_forge() {
      user_exists=true
    fi
    # Skip token/password regeneration if both exist in .env and not forcing rotation
    if [ "$token_exists" = true ] && [ "$pass_exists" = true ] && [ "$rotate_tokens" = false ]; then
      echo "  ${bot_user} token and password preserved (use --rotate-tokens to force)"
      # Still export the existing token for use within this run
      local existing_token existing_pass
      existing_token=$(grep "^${token_var}=" "$env_file" | head -1 | cut -d= -f2-)
      existing_pass=$(grep "^${pass_var}=" "$env_file" | head -1 | cut -d= -f2-)
      export "${token_var}=${existing_token}"
      export "${pass_var}=${existing_pass}"
      continue
    fi
    # Generate new credentials if:
    # - Token doesn't exist (first run)
    # - Password doesn't exist (first run)
    # - --rotate-tokens flag is set (explicit rotation)
    if [ "$user_exists" = false ]; then
      # User doesn't exist - create it
      bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
      echo "Creating bot user: ${bot_user}"
      local create_output
      if ! create_output=$(_forgejo_exec forgejo admin user create \
@ -439,22 +350,16 @@ setup_forge() {
      fi
      echo "  ${bot_user} user created"
    else
-      # User exists - reset password if needed
+      echo "  ${bot_user} user exists (resetting password for token generation)"
-      echo "  ${bot_user} user exists"
+      # User exists but may not have a known password.
-      if [ "$rotate_tokens" = true ] || [ "$pass_exists" = false ]; then
+      # Use admin API to reset the password so we can generate a new token.
-        bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
+      _forgejo_exec forgejo admin user change-password \
-        _forgejo_exec forgejo admin user change-password \
+        --username "${bot_user}" \
-          --username "${bot_user}" \
+        --password "${bot_pass}" \
-          --password "${bot_pass}" \
+        --must-change-password=false || {
-          --must-change-password=false || {
+        echo "Error: failed to reset password for existing bot user '${bot_user}'" >&2
-          echo "Error: failed to reset password for existing bot user '${bot_user}'" >&2
+        exit 1
-          exit 1
+      }
        }
        echo "  ${bot_user} password reset for token generation"
      else
        # Password exists, get it from .env
        bot_pass=$(grep "^${pass_var}=" "$env_file" | head -1 | cut -d= -f2-)
      fi
    fi
    # Generate token via API (basic auth as the bot user — Forgejo requires
@ -499,6 +404,7 @@ setup_forge() {
    # Store password in .env for git HTTP push (#361)
    # Forgejo 11.x API tokens don't work for git push; password auth does.
    pass_var="${bot_pass_vars[$bot_user]}"
    if grep -q "^${pass_var}=" "$env_file" 2>/dev/null; then
      sed -i "s|^${pass_var}=.*|${pass_var}=${bot_pass}|" "$env_file"
    else
@ -515,164 +421,17 @@ setup_forge() {
    fi
  done
  # Create llama bot users and tokens (local-model agents)
  # These are separate from the main agents and get their own credentials
  echo ""
  echo "── Setting up llama bot users ────────────────────────────"
  local llama_user llama_pass llama_token llama_token_var llama_pass_var
  for llama_user in "${!llama_token_vars[@]}"; do
    llama_token_var="${llama_token_vars[$llama_user]}"
    llama_pass_var="${llama_pass_vars[$llama_user]}"
    # Check if token already exists in .env
    local token_exists=false
    if _token_exists_in_env "$llama_token_var" "$env_file"; then
      token_exists=true
    fi
    # Check if password already exists in .env
    local pass_exists=false
    if _pass_exists_in_env "$llama_pass_var" "$env_file"; then
      pass_exists=true
    fi
    # Check if llama bot user exists on Forgejo
    local llama_user_exists=false
    if curl -sf --max-time 5 \
      -H "Authorization: token ${admin_token}" \
      "${forge_url}/api/v1/users/${llama_user}" >/dev/null 2>&1; then
      llama_user_exists=true
    fi
    # Skip token/password regeneration if both exist in .env and not forcing rotation
    if [ "$token_exists" = true ] && [ "$pass_exists" = true ] && [ "$rotate_tokens" = false ]; then
      echo "  ${llama_user} token and password preserved (use --rotate-tokens to force)"
      # Still export the existing token for use within this run
      local existing_token existing_pass
      existing_token=$(grep "^${llama_token_var}=" "$env_file" | head -1 | cut -d= -f2-)
      existing_pass=$(grep "^${llama_pass_var}=" "$env_file" | head -1 | cut -d= -f2-)
      export "${llama_token_var}=${existing_token}"
      export "${llama_pass_var}=${existing_pass}"
      continue
    fi
    # Generate new credentials if:
    # - Token doesn't exist (first run)
    # - Password doesn't exist (first run)
    # - --rotate-tokens flag is set (explicit rotation)
    if [ "$llama_user_exists" = false ]; then
      # User doesn't exist - create it
      llama_pass="llama-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
      echo "Creating llama bot user: ${llama_user}"
      local create_output
      if ! create_output=$(_forgejo_exec forgejo admin user create \
        --username "${llama_user}" \
        --password "${llama_pass}" \
        --email "${llama_user}@disinto.local" \
        --must-change-password=false 2>&1); then
        echo "Error: failed to create llama bot user '${llama_user}':" >&2
        echo "  ${create_output}" >&2
        exit 1
      fi
      # Forgejo 11.x ignores --must-change-password=false on create;
      # explicitly clear the flag so basic-auth token creation works.
      _forgejo_exec forgejo admin user change-password \
        --username "${llama_user}" \
        --password "${llama_pass}" \
        --must-change-password=false
      # Verify llama bot user was actually created
      if ! curl -sf --max-time 5 \
        -H "Authorization: token ${admin_token}" \
        "${forge_url}/api/v1/users/${llama_user}" >/dev/null 2>&1; then
        echo "Error: llama bot user '${llama_user}' not found after creation" >&2
        exit 1
      fi
      echo "  ${llama_user} user created"
    else
      # User exists - reset password if needed
      echo "  ${llama_user} user exists"
      if [ "$rotate_tokens" = true ] || [ "$pass_exists" = false ]; then
        llama_pass="llama-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
        _forgejo_exec forgejo admin user change-password \
          --username "${llama_user}" \
          --password "${llama_pass}" \
          --must-change-password=false || {
          echo "Error: failed to reset password for existing llama bot user '${llama_user}'" >&2
          exit 1
        }
        echo "  ${llama_user} password reset for token generation"
      else
        # Password exists, get it from .env
        llama_pass=$(grep "^${llama_pass_var}=" "$env_file" | head -1 | cut -d= -f2-)
      fi
    fi
    # Generate token via API (basic auth as the llama user)
    # First, delete any existing tokens to avoid name collision
    local existing_llama_token_ids
    existing_llama_token_ids=$(curl -sf \
      -u "${llama_user}:${llama_pass}" \
      "${forge_url}/api/v1/users/${llama_user}/tokens" 2>/dev/null \
      | jq -r '.[].id // empty' 2>/dev/null) || existing_llama_token_ids=""
    # Delete any existing tokens for this user
    if [ -n "$existing_llama_token_ids" ]; then
      while IFS= read -r tid; do
        [ -n "$tid" ] && curl -sf -X DELETE \
          -u "${llama_user}:${llama_pass}" \
          "${forge_url}/api/v1/users/${llama_user}/tokens/${tid}" >/dev/null 2>&1 || true
      done <<< "$existing_llama_token_ids"
    fi
    llama_token=$(curl -sf -X POST \
      -u "${llama_user}:${llama_pass}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/users/${llama_user}/tokens" \
      -d "{\"name\":\"disinto-${llama_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \
      | jq -r '.sha1 // empty') || llama_token=""
    if [ -z "$llama_token" ]; then
      echo "Error: failed to create API token for '${llama_user}'" >&2
      exit 1
    fi
    # Store token in .env under the llama-specific variable name
    if grep -q "^${llama_token_var}=" "$env_file" 2>/dev/null; then
      sed -i "s|^${llama_token_var}=.*|${llama_token_var}=${llama_token}|" "$env_file"
    else
      printf '%s=%s\n' "$llama_token_var" "$llama_token" >> "$env_file"
    fi
    export "${llama_token_var}=${llama_token}"
    echo "  ${llama_user} token generated and saved (${llama_token_var})"
    # Store password in .env for git HTTP push (#361)
    # Forgejo 11.x API tokens don't work for git push; password auth does.
    if grep -q "^${llama_pass_var}=" "$env_file" 2>/dev/null; then
      sed -i "s|^${llama_pass_var}=.*|${llama_pass_var}=${llama_pass}|" "$env_file"
    else
      printf '%s=%s\n' "$llama_pass_var" "$llama_pass" >> "$env_file"
    fi
    export "${llama_pass_var}=${llama_pass}"
    echo "  ${llama_user} password saved (${llama_pass_var})"
  done
  # Create .profile repos for all bot users (if they don't already exist)
  # This runs the same logic as hire-an-agent Step 2-3 for idempotent setup
  echo ""
  echo "── Setting up .profile repos ────────────────────────────"
  local -a bot_users=(dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot)
  # Add llama bot users to .profile repo creation
  for llama_user in "${!llama_token_vars[@]}"; do
    bot_users+=("$llama_user")
  done
  local bot_user
  for bot_user in "${bot_users[@]}"; do
    # Check if .profile repo already exists
-    if curl -sf --max-time 5 -H "Authorization: token ${admin_token}" "${forge_url}/api/v1/repos/${bot_user}/.profile" >/dev/null 2>&1; then
+    if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${bot_user}/.profile" >/dev/null 2>&1; then
      echo "  ${bot_user}/.profile already exists"
      continue
    fi
@ -747,7 +506,7 @@ setup_forge() {
    fi
    # Add all bot users as collaborators with appropriate permissions
-    # dev-bot: write (PR creation via lib/action-vault.sh)
+    # dev-bot: write (PR creation via lib/vault.sh)
    # review-bot: read (PR review)
    # planner-bot: write (prerequisites.md, memory)
    # gardener-bot: write (backlog grooming)
@ -775,15 +534,6 @@ setup_forge() {
        -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true
    done
    # Add llama bot users as write collaborators for local-model agents
    for llama_user in "${!llama_token_vars[@]}"; do
      curl -sf -X PUT \
        -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
        -H "Content-Type: application/json" \
        "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${llama_user}" \
        -d '{"permission":"write"}' >/dev/null 2>&1 || true
    done
    # Add disinto-admin as admin collaborator
    curl -sf -X PUT \
      -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
--- a/lib/formula-session.sh
+++ b/lib/formula-session.sh
@ -18,8 +18,7 @@
 #   ensure_profile_repo [AGENT_IDENTITY]   — clone/pull .profile repo
 #   _profile_has_repo                      — check if agent has .profile repo
 #   _count_undigested_journals             — count journal entries to digest
-#   _profile_digest_journals               — digest journals into lessons (timeout + batch cap)
+#   _profile_digest_journals               — digest journals into lessons
 #   _profile_restore_lessons FILE BACKUP   — restore lessons on digest failure
 #   _profile_commit_and_push MESSAGE [FILES] — commit/push to .profile repo
 #   resolve_agent_identity                 — resolve agent user login from FORGE_TOKEN
 #   build_graph_section                    — run build-graph.py and set GRAPH_SECTION
@ -29,13 +28,7 @@
 #   ops_commit_and_push MESSAGE [FILES]    — commit/push to ops repo
 #   cleanup_stale_crashed_worktrees [HOURS] — thin wrapper around worktree_cleanup_stale
 #
-# Requires: lib/env.sh, lib/worktree.sh, lib/agent-sdk.sh sourced first for shared helpers.
+# Requires: lib/env.sh, lib/worktree.sh sourced first for shared helpers.
 # Source agent-sdk for claude_run_with_watchdog watchdog helper
 source "$(dirname "${BASH_SOURCE[0]}")/agent-sdk.sh"
 # Source ops-setup for migrate_ops_repo (used by ensure_ops_repo)
 source "$(dirname "${BASH_SOURCE[0]}")/ops-setup.sh"
 # ── Run guards ───────────────────────────────────────────────────────────
@ -120,16 +113,15 @@ ensure_profile_repo() {
  # Define cache directory: /home/agent/data/.profile/{agent-name}
  PROFILE_REPO_PATH="${HOME:-/home/agent}/data/.profile/${agent_identity}"
-  # Build clone URL from FORGE_URL — credential helper supplies auth (#604)
+  # Build clone URL from FORGE_URL and agent identity
  local forge_url="${FORGE_URL:-http://localhost:3000}"
-  local clone_url="${forge_url}/${agent_identity}/.profile.git"
+  local auth_url
  auth_url=$(printf '%s' "$forge_url" | sed "s|://|://$(whoami):${FORGE_TOKEN}@|")
  local clone_url="${auth_url}/${agent_identity}/.profile.git"
  # Check if already cached and up-to-date
  if [ -d "${PROFILE_REPO_PATH}/.git" ]; then
    log "Pulling .profile repo: ${agent_identity}/.profile"
    # Always refresh the remote URL to ensure it's clean (no baked credentials)
    # This fixes auth issues when old URLs contained the wrong username (#652)
    git -C "$PROFILE_REPO_PATH" remote set-url origin "$clone_url" 2>/dev/null || true
    if git -C "$PROFILE_REPO_PATH" fetch origin --quiet 2>/dev/null; then
      git -C "$PROFILE_REPO_PATH" checkout main --quiet 2>/dev/null || \
      git -C "$PROFILE_REPO_PATH" checkout master --quiet 2>/dev/null || true
@ -192,14 +184,10 @@ _count_undigested_journals() {
 # _profile_digest_journals
 # Runs a claude -p one-shot to digest undigested journals into lessons-learned.md
 # Respects PROFILE_DIGEST_TIMEOUT (default 300s) and PROFILE_DIGEST_MAX_BATCH (default 5).
 # On failure/timeout, preserves the previous lessons-learned.md and does not archive journals.
 # Returns 0 on success, 1 on failure.
 _profile_digest_journals() {
  local agent_identity="${AGENT_IDENTITY:-}"
  local model="${CLAUDE_MODEL:-opus}"
  local digest_timeout="${PROFILE_DIGEST_TIMEOUT:-300}"
  local max_batch="${PROFILE_DIGEST_MAX_BATCH:-5}"
  if [ -z "$agent_identity" ]; then
    if ! resolve_agent_identity; then
@ -212,27 +200,19 @@ _profile_digest_journals() {
  local knowledge_dir="${PROFILE_REPO_PATH}/knowledge"
  local lessons_file="${knowledge_dir}/lessons-learned.md"
-  # Collect undigested journal entries (capped at max_batch)
+  # Collect undigested journal entries
  local journal_entries=""
  local batch_count=0
  local -a batchfiles=()
  if [ -d "$journal_dir" ]; then
    for jf in "$journal_dir"/*.md; do
      [ -f "$jf" ] || continue
      # Skip archived entries
      [[ "$jf" == */archive/* ]] && continue
      if [ "$batch_count" -ge "$max_batch" ]; then
        log "profile: capping digest batch at ${max_batch} journals (remaining will be digested in future runs)"
        break
      fi
      local basename
      basename=$(basename "$jf")
      journal_entries="${journal_entries}
 ### ${basename}
 $(cat "$jf")
 "
      batchfiles+=("$jf")
      batch_count=$((batch_count + 1))
    done
  fi
@ -241,104 +221,64 @@ $(cat "$jf")
    return 0
  fi
-  log "profile: digesting ${batch_count} journals (timeout ${digest_timeout}s)"
+  # Read existing lessons if available
-
+  local existing_lessons=""
  # Ensure knowledge directory exists
  mkdir -p "$knowledge_dir"
  # Back up existing lessons-learned.md so we can restore on failure
  local lessons_backup=""
  if [ -f "$lessons_file" ]; then
-    lessons_backup=$(mktemp)
+    existing_lessons=$(cat "$lessons_file")
    cp "$lessons_file" "$lessons_backup"
  fi
  # Capture mtime so we can detect a Write-tool write afterwards
  local mtime_before=0
  [ -f "$lessons_file" ] && mtime_before=$(stat -c %Y "$lessons_file")
  # Build prompt for digestion
  local digest_prompt="You are digesting journal entries from a developer agent's work sessions.
 ## Task
-Update the lessons-learned file at this exact absolute path:
+Condense these journal entries into abstract, transferable lessons. Rewrite lessons-learned.md entirely.
  ${lessons_file}
 1. Read ${lessons_file} (it may not exist yet — that's fine, treat as empty).
 2. Digest the journal entries below into abstract, transferable patterns and heuristics.
 3. Merge with the existing lessons: preserve anything still useful, refine, drop stale or redundant entries, add new ones.
 4. Write the merged result back to ${lessons_file} using the Write tool.
 ## Constraints
 - Hard cap: 2KB maximum
 - Abstract: patterns and heuristics, not specific issues or file paths
 - Transferable: must help with future unseen work, not just recall past work
- Drop the least transferable lessons if over the cap
+- Drop the least transferable lessons if over limit
 ## Existing lessons-learned.md (if any)
 ${existing_lessons:-<none>}
 ## Journal entries to digest
-${journal_entries}"
+${journal_entries}
-  # Run claude -p one-shot with digest-specific timeout
+## Output
-  local output digest_rc
+Write the complete, rewritten lessons-learned.md content below. No preamble, no explanation — just the file content."
-  local saved_timeout="${CLAUDE_TIMEOUT:-7200}"
+
-  CLAUDE_TIMEOUT="$digest_timeout"
+  # Run claude -p one-shot with same model as agent
-  output=$(claude_run_with_watchdog claude -p "$digest_prompt" \
+  local output
  output=$(claude -p "$digest_prompt" \
    --output-format json \
    --dangerously-skip-permissions \
    ${model:+--model "$model"} \
-    2>>"$LOGFILE") && digest_rc=0 || digest_rc=$?
+    2>>"$LOGFILE" || echo '{"result":"error"}')
  CLAUDE_TIMEOUT="$saved_timeout"
-  if [ "$digest_rc" -eq 124 ]; then
+  # Extract content from JSON response
-    log "profile: digest timed out after ${digest_timeout}s — preserving previous lessons, skipping archive"
+  local lessons_content
-    _profile_restore_lessons "$lessons_file" "$lessons_backup"
+  lessons_content=$(printf '%s' "$output" | jq -r '.result // empty' 2>/dev/null || echo "")
  if [ -z "$lessons_content" ]; then
    log "profile: failed to digest journals"
    return 1
  fi
-  if [ "$digest_rc" -ne 0 ]; then
+  # Ensure knowledge directory exists
-    log "profile: digest failed (exit code ${digest_rc}) — preserving previous lessons, skipping archive"
+  mkdir -p "$knowledge_dir"
    _profile_restore_lessons "$lessons_file" "$lessons_backup"
    return 1
  fi
-  local mtime_after=0
+  # Write the lessons file (full rewrite)
-  [ -f "$lessons_file" ] && mtime_after=$(stat -c %Y "$lessons_file")
+  printf '%s\n' "$lessons_content" > "$lessons_file"
  log "profile: wrote lessons-learned.md (${#lessons_content} bytes)"
-  if [ "$mtime_after" -gt "$mtime_before" ] && [ -s "$lessons_file" ]; then
+  # Move digested journals to archive (if any were processed)
-    local file_size
+  if [ -d "$journal_dir" ]; then
    file_size=$(wc -c < "$lessons_file")
    # Treat tiny files (<=16 bytes) as failed digestion (e.g. "null", "{}", empty)
    if [ "$file_size" -le 16 ]; then
      log "profile: digest produced suspiciously small file (${file_size} bytes) — preserving previous lessons, skipping archive"
      _profile_restore_lessons "$lessons_file" "$lessons_backup"
      return 1
    fi
    log "profile: lessons-learned.md written by model via Write tool (${file_size} bytes)"
  else
    # Fallback: model didn't use Write tool — capture .result and strip any markdown code fence
    local lessons_content
    lessons_content=$(printf '%s' "$output" | jq -r '.result // empty' 2>/dev/null || echo "")
    lessons_content=$(printf '%s' "$lessons_content" | sed -E '1{/^```(markdown|md)?[[:space:]]*$/d;};${/^```[[:space:]]*$/d;}')
    if [ -z "$lessons_content" ] || [ "${#lessons_content}" -le 16 ]; then
      log "profile: failed to digest journals (no Write tool call, empty or tiny .result) — preserving previous lessons, skipping archive"
      _profile_restore_lessons "$lessons_file" "$lessons_backup"
      return 1
    fi
    printf '%s\n' "$lessons_content" > "$lessons_file"
    log "profile: lessons-learned.md written from .result fallback (${#lessons_content} bytes)"
  fi
  # Clean up backup on success
  [ -n "$lessons_backup" ] && rm -f "$lessons_backup"
  # Move only the digested journals to archive (not all — only the batch we processed)
  if [ ${#batchfiles[@]} -gt 0 ]; then
    mkdir -p "${journal_dir}/archive"
    local archived=0
-    for jf in "${batchfiles[@]}"; do
+    for jf in "$journal_dir"/*.md; do
      [ -f "$jf" ] || continue
      [[ "$jf" == */archive/* ]] && continue
      local basename
      basename=$(basename "$jf")
      mv "$jf" "${journal_dir}/archive/${basename}" 2>/dev/null && archived=$((archived + 1))
@ -348,27 +288,9 @@ ${journal_entries}"
    fi
  fi
  # Commit and push the digest results
  _profile_commit_and_push \
    "profile: digest ${archived:-0} journals → knowledge/lessons-learned.md" \
    knowledge/lessons-learned.md \
    journal/
  return 0
 }
 # _profile_restore_lessons LESSONS_FILE BACKUP_FILE
 # Restores previous lessons-learned.md from backup on digest failure.
 _profile_restore_lessons() {
  local lessons_file="$1"
  local backup="$2"
  if [ -n "$backup" ] && [ -f "$backup" ]; then
    cp "$backup" "$lessons_file"
    rm -f "$backup"
    log "profile: restored previous lessons-learned.md"
  fi
 }
 # _profile_commit_and_push MESSAGE [FILE ...]
 # Commits and pushes changes to .profile repo.
 _profile_commit_and_push() {
@ -383,15 +305,6 @@ _profile_commit_and_push() {
  (
    cd "$PROFILE_REPO_PATH" || return 1
    # Refresh the remote URL to ensure credentials are current (#652)
    # This ensures we use the correct bot identity and fresh credentials
    local forge_url="${FORGE_URL:-http://localhost:3000}"
    local agent_identity="${AGENT_IDENTITY:-}"
    if [ -n "$agent_identity" ]; then
      local remote_url="${forge_url}/${agent_identity}/.profile.git"
      git remote set-url origin "$remote_url" 2>/dev/null || true
    fi
    if [ ${#files[@]} -gt 0 ]; then
      git add "${files[@]}"
    else
@ -400,7 +313,7 @@ _profile_commit_and_push() {
    if ! git diff --cached --quiet 2>/dev/null; then
      git config user.name "${AGENT_IDENTITY}" || true
-      git config user.email "${AGENT_IDENTITY}@disinto.local" || true
+      git config user.email "${AGENT_IDENTITY}@users.noreply.codeberg.org" || true
      git commit -m "$msg" --no-verify 2>/dev/null || true
      git push origin main --quiet 2>/dev/null || git push origin master --quiet 2>/dev/null || true
    fi
@ -409,8 +322,7 @@ _profile_commit_and_push() {
 # profile_load_lessons
 # Pre-session: loads lessons-learned.md into LESSONS_CONTEXT for prompt injection.
-# Lazy digestion: if undigested journals exceed PROFILE_DIGEST_THRESHOLD (default 10),
+# Lazy digestion: if >10 undigested journals exist, runs claude -p to digest them.
 # runs claude -p to digest them (bounded by PROFILE_DIGEST_MAX_BATCH and PROFILE_DIGEST_TIMEOUT).
 # Returns 0 on success, 1 if agent has no .profile repo (silent no-op).
 # Requires: ensure_profile_repo() called, AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL, CLAUDE_MODEL.
 # Exports: LESSONS_CONTEXT (the lessons file content, hard-capped at 2KB).
@ -426,14 +338,13 @@ profile_load_lessons() {
  fi
  # Check journal count for lazy digestion trigger
-  local journal_count digest_threshold
+  local journal_count
  journal_count=$(_count_undigested_journals)
  digest_threshold="${PROFILE_DIGEST_THRESHOLD:-10}"
-  if [ "${journal_count:-0}" -gt "$digest_threshold" ]; then
+  if [ "${journal_count:-0}" -gt 10 ]; then
-    log "profile: ${journal_count} undigested journals (threshold ${digest_threshold})"
+    log "profile: digesting ${journal_count} undigested journals"
    if ! _profile_digest_journals; then
-      log "profile: warning — journal digestion failed, continuing with existing lessons"
+      log "profile: warning — journal digestion failed"
    fi
  fi
@ -533,7 +444,7 @@ Write the journal entry below. Use markdown format."
  # Run claude -p one-shot with same model as agent
  local output
-  output=$(claude_run_with_watchdog claude -p "$reflection_prompt" \
+  output=$(claude -p "$reflection_prompt" \
    --output-format json \
    --dangerously-skip-permissions \
    ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} \
@ -674,7 +585,6 @@ ensure_ops_repo() {
    git -C "$ops_root" fetch origin "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
    git -C "$ops_root" checkout "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
    git -C "$ops_root" pull --ff-only origin "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
    migrate_ops_repo "$ops_root" "${PRIMARY_BRANCH}"
    return 0
  fi
@ -682,8 +592,14 @@ ensure_ops_repo() {
  local ops_repo="${FORGE_OPS_REPO:-}"
  [ -n "$ops_repo" ] || return 0
  local forge_url="${FORGE_URL:-http://localhost:3000}"
-  # Use clean URL — credential helper supplies auth (#604)
+  local clone_url
-  local clone_url="${forge_url}/${ops_repo}.git"
+  if [ -n "${FORGE_TOKEN:-}" ]; then
    local auth_url
    auth_url=$(printf '%s' "$forge_url" | sed "s|://|://$(whoami):${FORGE_TOKEN}@|")
    clone_url="${auth_url}/${ops_repo}.git"
  else
    clone_url="${forge_url}/${ops_repo}.git"
  fi
  log "Cloning ops repo: ${ops_repo} -> ${ops_root}"
  if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then
@ -819,7 +735,8 @@ build_prompt_footer() {
 Base URL: ${FORGE_API}
 Auth header: -H \"Authorization: token \${FORGE_TOKEN}\"
  Read issue:  curl -sf -H \"Authorization: token \${FORGE_TOKEN}\" '${FORGE_API}/issues/{number}' | jq '.body'
-  List labels: curl -sf -H \"Authorization: token \${FORGE_TOKEN}\" '${FORGE_API}/labels'${extra_api}
+  Create issue: curl -sf -X POST -H \"Authorization: token \${FORGE_TOKEN}\" -H 'Content-Type: application/json' '${FORGE_API}/issues' -d '{\"title\":\"...\",\"body\":\"...\",\"labels\":[LABEL_ID]}'${extra_api}
  List labels: curl -sf -H \"Authorization: token \${FORGE_TOKEN}\" '${FORGE_API}/labels'
 NEVER echo or include the actual token value in output — always reference \${FORGE_TOKEN}.
 ## Environment
--- a/lib/generators.sh
+++ b/lib/generators.sh
@ -26,46 +26,6 @@ PROJECT_NAME="${PROJECT_NAME:-project}"
 # PRIMARY_BRANCH defaults to main (env.sh may have set it to 'master')
 PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}"
 # Helper: extract woodpecker_repo_id from a project TOML file
 # Returns empty string if not found or file doesn't exist
 _get_woodpecker_repo_id() {
  local toml_file="$1"
  if [ -f "$toml_file" ]; then
    python3 -c "
 import sys, tomllib
 try:
    with open(sys.argv[1], 'rb') as f:
        cfg = tomllib.load(f)
    ci = cfg.get('ci', {})
    wp_id = ci.get('woodpecker_repo_id', '0')
    print(wp_id)
 except Exception:
    print('0')
 " "$toml_file" 2>/dev/null || echo "0"
  else
    echo "0"
  fi
 }
 # Find all project TOML files and extract the highest woodpecker_repo_id
 # (used for the main agents service which doesn't have a per-project TOML)
 _get_primary_woodpecker_repo_id() {
  local projects_dir="${FACTORY_ROOT}/projects"
  local max_id="0"
  for toml in "${projects_dir}"/*.toml; do
    [ -f "$toml" ] || continue
    local repo_id
    repo_id=$(_get_woodpecker_repo_id "$toml")
    if [ -n "$repo_id" ] && [ "$repo_id" != "0" ]; then
      # Use the first non-zero repo_id found (or highest if multiple)
      if [ "$repo_id" -gt "$max_id" ] 2>/dev/null; then
        max_id="$repo_id"
      fi
    fi
  done
  echo "$max_id"
 }
 # Parse project TOML for local-model agents and emit compose services.
 # Writes service definitions to stdout; caller handles insertion into compose file.
 _generate_local_model_services() {
@ -80,10 +40,6 @@ _generate_local_model_services() {
  for toml in "${projects_dir}"/*.toml; do
    [ -f "$toml" ] || continue
    # Get woodpecker_repo_id for this project
    local wp_repo_id
    wp_repo_id=$(_get_woodpecker_repo_id "$toml")
    # Parse [agents.*] sections using Python - output YAML-compatible format
    while IFS='=' read -r key value; do
      case "$key" in
@ -100,7 +56,9 @@ _generate_local_model_services() {
            cat >> "$temp_file" <<EOF
  agents-${service_name}:
-    image: ghcr.io/disinto/agents:\${DISINTO_IMAGE_TAG:-latest}
+    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    container_name: disinto-agents-${service_name}
    restart: unless-stopped
    security_opt:
@ -108,16 +66,13 @@ _generate_local_model_services() {
    volumes:
      - agents-${service_name}-data:/home/agent/data
      - project-repos:/home/agent/repos
-      - \${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:\${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
+      - \${HOME}/.claude:/home/agent/.claude
-      - \${CLAUDE_CONFIG_FILE:-\${HOME}/.claude.json}:/home/agent/.claude.json:ro
+      - \${HOME}/.claude.json:/home/agent/.claude.json:ro
-      - \${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
+      - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro
-      - \${AGENT_SSH_DIR:-\${HOME}/.ssh}:/home/agent/.ssh:ro
+      - \${HOME}/.ssh:/home/agent/.ssh:ro
    environment:
      FORGE_URL: http://forgejo:3000
-      FORGE_REPO: ${FORGE_REPO:-disinto-admin/disinto}
+      FORGE_TOKEN: \${FORGE_TOKEN:-}
      # Use llama-specific credentials if available, otherwise fall back to main FORGE_TOKEN
      FORGE_TOKEN: \${FORGE_TOKEN_LLAMA:-\${FORGE_TOKEN:-}}
      FORGE_PASS: \${FORGE_PASS_LLAMA:-\${FORGE_PASS:-}}
      FORGE_REVIEW_TOKEN: \${FORGE_REVIEW_TOKEN:-}
      FORGE_BOT_USERNAMES: \${FORGE_BOT_USERNAMES:-}
      AGENT_ROLES: "${roles}"
@ -125,27 +80,21 @@ _generate_local_model_services() {
      ANTHROPIC_BASE_URL: "${base_url}"
      ANTHROPIC_API_KEY: "${api_key}"
      CLAUDE_MODEL: "${model}"
-      CLAUDE_CONFIG_DIR: \${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
+      CLAUDE_CONFIG_DIR: /home/agent/.claude-${service_name}
-      CLAUDE_CREDENTIALS_DIR: \${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}/credentials
+      CLAUDE_CREDENTIALS_DIR: /home/agent/.claude-${service_name}/credentials
      CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "${compact_pct}"
      CLAUDE_CODE_ATTRIBUTION_HEADER: "0"
      CLAUDE_CODE_ENABLE_TELEMETRY: "0"
      DISINTO_CONTAINER: "1"
      PROJECT_NAME: ${PROJECT_NAME:-project}
      PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
      WOODPECKER_DATA_DIR: /woodpecker-data
      WOODPECKER_REPO_ID: "${wp_repo_id}"
      FORGE_BOT_USER_${service_name^^}: "${forge_user}"
      POLL_INTERVAL: "${poll_interval_val}"
-      GARDENER_INTERVAL: "${GARDENER_INTERVAL:-21600}"
+      GARDENER_INTERVAL: \${GARDENER_INTERVAL:-21600}
-      ARCHITECT_INTERVAL: "${ARCHITECT_INTERVAL:-21600}"
+      ARCHITECT_INTERVAL: \${ARCHITECT_INTERVAL:-21600}
      PLANNER_INTERVAL: "${PLANNER_INTERVAL:-43200}"
      SUPERVISOR_INTERVAL: "${SUPERVISOR_INTERVAL:-1200}"
    depends_on:
-      forgejo:
+      - forgejo
-        condition: service_healthy
+      - woodpecker
      woodpecker:
        condition: service_started
    networks:
      - disinto-net
    profiles: ["agents-${service_name}"]
@ -227,12 +176,8 @@ for name, config in agents.items():
 }
 # Generate docker-compose.yml in the factory root.
 # **CANONICAL SOURCE**: This generator is the single source of truth for docker-compose.yml.
 # The tracked docker-compose.yml file has been removed. Operators must run 'bin/disinto init'
 # to materialize a working stack on a fresh checkout.
 _generate_compose_impl() {
  local forge_port="${1:-3000}"
  local use_build="${2:-false}"
  local compose_file="${FACTORY_ROOT}/docker-compose.yml"
  # Check if compose file already exists
@ -241,10 +186,6 @@ _generate_compose_impl() {
    return 0
  fi
  # Extract primary woodpecker_repo_id from project TOML files
  local wp_repo_id
  wp_repo_id=$(_get_primary_woodpecker_repo_id)
  cat > "$compose_file" <<'COMPOSEEOF'
 # docker-compose.yml — generated by disinto init
 # Brings up Forgejo, Woodpecker, and the agent runtime.
@ -260,17 +201,11 @@ services:
      - forgejo-data:/data
    environment:
      FORGEJO__database__DB_TYPE: sqlite3
-      FORGEJO__server__ROOT_URL: ${FORGEJO_ROOT_URL:-http://forgejo:3000/}
+      FORGEJO__server__ROOT_URL: http://forgejo:3000/
      FORGEJO__server__HTTP_PORT: "3000"
      FORGEJO__security__INSTALL_LOCK: "true"
      FORGEJO__service__DISABLE_REGISTRATION: "true"
      FORGEJO__webhook__ALLOWED_HOST_LIST: "private"
    healthcheck:
      test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/api/v1/version"]
      interval: 5s
      timeout: 3s
      retries: 30
      start_period: 30s
    networks:
      - disinto-net
@ -291,16 +226,13 @@ services:
      WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-}
      WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-}
      WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000}
      WOODPECKER_SERVER: http://woodpecker:9000
      WOODPECKER_OPEN: "true"
      WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
      WOODPECKER_DATABASE_DRIVER: sqlite3
      WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite
      WOODPECKER_PLUGINS_PRIVILEGED: ${WOODPECKER_PLUGINS_PRIVILEGED:-plugins/docker}
      WOODPECKER_ENVIRONMENT: "FORGE_TOKEN:${FORGE_TOKEN}"
    depends_on:
-      forgejo:
+      - forgejo
        condition: service_healthy
    networks:
      - disinto-net
@ -319,19 +251,15 @@ services:
      WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
      WOODPECKER_GRPC_SECURE: "false"
      WOODPECKER_HEALTHCHECK_ADDR: ":3333"
-      WOODPECKER_BACKEND_DOCKER_NETWORK: ${WOODPECKER_CI_NETWORK:-disinto_disinto-net}
+      WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net
      WOODPECKER_MAX_WORKFLOWS: 1
    healthcheck:
      test: ["CMD", "wget", "-q", "--spider", "http://localhost:3333/healthz"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 15s
    depends_on:
      - woodpecker
  agents:
-    image: ghcr.io/disinto/agents:${DISINTO_IMAGE_TAG:-latest}
+    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    container_name: disinto-agents
    restart: unless-stopped
    security_opt:
@ -339,18 +267,14 @@ services:
    volumes:
      - agent-data:/home/agent/data
      - project-repos:/home/agent/repos
-      - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
+      - ${HOME}/.claude:/home/agent/.claude
-      - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro
+      - ${HOME}/.claude.json:/home/agent/.claude.json:ro
-      - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
+      - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro
-      - ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro
+      - ${HOME}/.ssh:/home/agent/.ssh:ro
-      - ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro
+      - ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro
      - woodpecker-data:/woodpecker-data:ro
      - ./projects:/home/agent/disinto/projects:ro
      - ./.env:/home/agent/disinto/.env:ro
      - ./state:/home/agent/disinto/state
    environment:
      FORGE_URL: http://forgejo:3000
      FORGE_REPO: ${FORGE_REPO:-disinto-admin/disinto}
      FORGE_TOKEN: ${FORGE_TOKEN:-}
      FORGE_REVIEW_TOKEN: ${FORGE_REVIEW_TOKEN:-}
      FORGE_PLANNER_TOKEN: ${FORGE_PLANNER_TOKEN:-}
@ -364,180 +288,35 @@ services:
      CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200}
      CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
      FORGE_PASS: ${FORGE_PASS:-}
      FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-}
      FACTORY_REPO: ${FORGE_REPO:-disinto-admin/disinto}
      DISINTO_CONTAINER: "1"
      PROJECT_NAME: ${PROJECT_NAME:-project}
      PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
      WOODPECKER_DATA_DIR: /woodpecker-data
      WOODPECKER_REPO_ID: "PLACEHOLDER_WP_REPO_ID"
      CLAUDE_CONFIG_DIR: ${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
      POLL_INTERVAL: ${POLL_INTERVAL:-300}
      GARDENER_INTERVAL: ${GARDENER_INTERVAL:-21600}
      ARCHITECT_INTERVAL: ${ARCHITECT_INTERVAL:-21600}
      PLANNER_INTERVAL: ${PLANNER_INTERVAL:-43200}
    # IMPORTANT: agents get explicit environment variables (forge tokens, CI tokens, config).
    # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in
-    # secrets/*.enc and are NEVER injected here — only the runner
+    # .env.vault.enc and are NEVER injected here — only the runner
-    # container receives them at fire time (AD-006, #745, #777).
+    # container receives them at fire time (AD-006, #745).
    healthcheck:
      test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
      interval: 60s
      timeout: 5s
      retries: 3
      start_period: 30s
    depends_on:
-      forgejo:
+      - forgejo
-        condition: service_healthy
+      - woodpecker
      woodpecker:
        condition: service_started
    networks:
      - disinto-net
 COMPOSEEOF
  # ── Conditional agents-llama block (ENABLE_LLAMA_AGENT=1) ──────────────
  # Local-Qwen dev agent — gated on ENABLE_LLAMA_AGENT so factories without
  # a local llama endpoint don't try to start it.  See docs/agents-llama.md.
  if [ "${ENABLE_LLAMA_AGENT:-0}" = "1" ]; then
    cat >> "$compose_file" <<'LLAMAEOF'
  agents-llama:
    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    container_name: disinto-agents-llama
    restart: unless-stopped
    security_opt:
      - apparmor=unconfined
    volumes:
      - agent-data:/home/agent/data
      - project-repos:/home/agent/repos
      - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
      - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro
      - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
      - ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro
      - ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro
      - woodpecker-data:/woodpecker-data:ro
    environment:
      FORGE_URL: http://forgejo:3000
      FORGE_REPO: ${FORGE_REPO:-disinto-admin/disinto}
      FORGE_TOKEN: ${FORGE_TOKEN_LLAMA:-}
      FORGE_PASS: ${FORGE_PASS_LLAMA:-}
      FORGE_BOT_USERNAMES: ${FORGE_BOT_USERNAMES:-}
      WOODPECKER_TOKEN: ${WOODPECKER_TOKEN:-}
      CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200}
      CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
      CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "60"
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
      ANTHROPIC_BASE_URL: ${ANTHROPIC_BASE_URL:-}
      FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-}
      DISINTO_CONTAINER: "1"
      PROJECT_NAME: ${PROJECT_NAME:-project}
      PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
      WOODPECKER_DATA_DIR: /woodpecker-data
      WOODPECKER_REPO_ID: "PLACEHOLDER_WP_REPO_ID"
      CLAUDE_CONFIG_DIR: ${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
      POLL_INTERVAL: ${POLL_INTERVAL:-300}
      AGENT_ROLES: dev
    healthcheck:
      test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
      interval: 60s
      timeout: 5s
      retries: 3
      start_period: 30s
    depends_on:
      forgejo:
        condition: service_healthy
    networks:
      - disinto-net
  agents-llama-all:
    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    container_name: disinto-agents-llama-all
    restart: unless-stopped
    profiles: ["agents-llama-all"]
    security_opt:
      - apparmor=unconfined
    volumes:
      - agent-data:/home/agent/data
      - project-repos:/home/agent/repos
      - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
      - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro
      - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
      - ${AGENT_SSH_DIR:-${HOME}/.ssh}:/home/agent/.ssh:ro
      - ${SOPS_AGE_DIR:-${HOME}/.config/sops/age}:/home/agent/.config/sops/age:ro
      - woodpecker-data:/woodpecker-data:ro
    environment:
      FORGE_URL: http://forgejo:3000
      FORGE_REPO: ${FORGE_REPO:-disinto-admin/disinto}
      FORGE_TOKEN: ${FORGE_TOKEN_LLAMA:-}
      FORGE_PASS: ${FORGE_PASS_LLAMA:-}
      FORGE_REVIEW_TOKEN: ${FORGE_REVIEW_TOKEN:-}
      FORGE_PLANNER_TOKEN: ${FORGE_PLANNER_TOKEN:-}
      FORGE_GARDENER_TOKEN: ${FORGE_GARDENER_TOKEN:-}
      FORGE_VAULT_TOKEN: ${FORGE_VAULT_TOKEN:-}
      FORGE_SUPERVISOR_TOKEN: ${FORGE_SUPERVISOR_TOKEN:-}
      FORGE_PREDICTOR_TOKEN: ${FORGE_PREDICTOR_TOKEN:-}
      FORGE_ARCHITECT_TOKEN: ${FORGE_ARCHITECT_TOKEN:-}
      FORGE_FILER_TOKEN: ${FORGE_FILER_TOKEN:-}
      FORGE_BOT_USERNAMES: ${FORGE_BOT_USERNAMES:-}
      WOODPECKER_TOKEN: ${WOODPECKER_TOKEN:-}
      CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200}
      CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
      CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "60"
      CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: "1"
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
      ANTHROPIC_BASE_URL: ${ANTHROPIC_BASE_URL:-}
      FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-}
      DISINTO_CONTAINER: "1"
      PROJECT_NAME: ${PROJECT_NAME:-project}
      PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
      WOODPECKER_DATA_DIR: /woodpecker-data
      WOODPECKER_REPO_ID: "PLACEHOLDER_WP_REPO_ID"
      CLAUDE_CONFIG_DIR: ${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
      POLL_INTERVAL: ${POLL_INTERVAL:-300}
      GARDENER_INTERVAL: ${GARDENER_INTERVAL:-21600}
      ARCHITECT_INTERVAL: ${ARCHITECT_INTERVAL:-21600}
      PLANNER_INTERVAL: ${PLANNER_INTERVAL:-43200}
      SUPERVISOR_INTERVAL: ${SUPERVISOR_INTERVAL:-1200}
      AGENT_ROLES: review,dev,gardener,architect,planner,predictor,supervisor
    healthcheck:
      test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
      interval: 60s
      timeout: 5s
      retries: 3
      start_period: 30s
    depends_on:
      forgejo:
        condition: service_healthy
      woodpecker:
        condition: service_started
    networks:
      - disinto-net
 LLAMAEOF
  fi
  # Resume the rest of the compose file (runner onward)
  cat >> "$compose_file" <<'COMPOSEEOF'
  runner:
-    image: ghcr.io/disinto/agents:${DISINTO_IMAGE_TAG:-latest}
+    build:
      context: .
      dockerfile: docker/agents/Dockerfile
    profiles: ["vault"]
    security_opt:
      - apparmor=unconfined
    volumes:
      - agent-data:/home/agent/data
      - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
      - ${HOME}/.claude.json:/home/agent/.claude.json:ro
    environment:
      FORGE_URL: http://forgejo:3000
      DISINTO_CONTAINER: "1"
      PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
      CLAUDE_CONFIG_DIR: ${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
    # Vault redesign in progress (PR-based approval, see #73-#77)
    # This container is being replaced — entrypoint will be updated in follow-up
    networks:
@ -546,9 +325,8 @@ LLAMAEOF
  # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging
  # Serves on ports 80/443, routes based on path
  edge:
-    image: ghcr.io/disinto/edge:${DISINTO_IMAGE_TAG:-latest}
+    build: ./docker/edge
    container_name: disinto-edge
    restart: unless-stopped
    security_opt:
      - apparmor=unconfined
    ports:
@ -560,43 +338,19 @@ LLAMAEOF
      - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
      - FORGE_OPS_REPO=${FORGE_OPS_REPO:-disinto-admin/disinto-ops}
      - FORGE_TOKEN=${FORGE_TOKEN:-}
      - FORGE_PASS=${FORGE_PASS:-}
      - FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin}
      - FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-}
      - OPS_REPO_ROOT=/opt/disinto-ops
      - PROJECT_REPO_ROOT=/opt/disinto
      - PRIMARY_BRANCH=main
      - CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
      # Reverse tunnel (optional — set by `disinto edge register`, see #622)
      - EDGE_TUNNEL_HOST=${EDGE_TUNNEL_HOST:-}
      - EDGE_TUNNEL_USER=${EDGE_TUNNEL_USER:-tunnel}
      - EDGE_TUNNEL_PORT=${EDGE_TUNNEL_PORT:-}
      - EDGE_TUNNEL_FQDN=${EDGE_TUNNEL_FQDN:-}
      # Subdomain fallback (#713): if subpath routing (#704/#708) fails, add:
      #   EDGE_TUNNEL_FQDN_FORGE, EDGE_TUNNEL_FQDN_CI, EDGE_TUNNEL_FQDN_CHAT
      # See docs/edge-routing-fallback.md for the full pivot plan.
      # Shared secret for Caddy ↔ chat forward_auth (#709)
      - FORWARD_AUTH_SECRET=${FORWARD_AUTH_SECRET:-}
    volumes:
      - ./docker/Caddyfile:/etc/caddy/Caddyfile
      - caddy_data:/data
      - /var/run/docker.sock:/var/run/docker.sock
      - ./secrets/tunnel_key:/run/secrets/tunnel_key:ro
      - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
      - ${CLAUDE_CONFIG_FILE:-${HOME}/.claude.json}:/home/agent/.claude.json:ro
    healthcheck:
      test: ["CMD", "curl", "-fsS", "http://localhost:2019/config/"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 15s
    depends_on:
-      forgejo:
+      - forgejo
-        condition: service_healthy
+      - woodpecker
-      woodpecker:
+      - staging
        condition: service_started
      staging:
        condition: service_started
    networks:
      - disinto-net
@ -607,12 +361,6 @@ LLAMAEOF
    command: ["caddy", "file-server", "--root", "/srv/site"]
    security_opt:
      - apparmor=unconfined
    healthcheck:
      test: ["CMD", "wget", "-q", "--spider", "http://localhost:2019/config/"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 10s
    volumes:
      - ./docker:/srv/site:ro
    networks:
@ -632,62 +380,12 @@ LLAMAEOF
      - disinto-net
    command: ["echo", "staging slot — replace with project image"]
  # Chat container — Claude chat UI backend (#705)
  # Internal service only; edge proxy routes to chat:8080
  # Sandbox hardened per #706 — no docker.sock, read-only rootfs, minimal caps
  chat:
    build:
      context: ./docker/chat
      dockerfile: Dockerfile
    container_name: disinto-chat
    restart: unless-stopped
    read_only: true
    tmpfs:
      - /tmp:size=64m
    security_opt:
      - no-new-privileges:true
    cap_drop:
      - ALL
    pids_limit: 128
    mem_limit: 512m
    memswap_limit: 512m
    volumes:
      # Mount claude binary from host (same as agents)
      - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
      # Throwaway named volume for chat config (isolated from host ~/.claude)
      - chat-config:/var/chat/config
      # Chat history persistence: per-user NDJSON files on bind-mounted host volume
      - ${CHAT_HISTORY_DIR:-./state/chat-history}:/var/lib/chat/history
    environment:
      CHAT_HOST: "0.0.0.0"
      CHAT_PORT: "8080"
      FORGE_URL: http://forgejo:3000
      CHAT_OAUTH_CLIENT_ID: ${CHAT_OAUTH_CLIENT_ID:-}
      CHAT_OAUTH_CLIENT_SECRET: ${CHAT_OAUTH_CLIENT_SECRET:-}
      EDGE_TUNNEL_FQDN: ${EDGE_TUNNEL_FQDN:-}
      DISINTO_CHAT_ALLOWED_USERS: ${DISINTO_CHAT_ALLOWED_USERS:-}
      # Shared secret for Caddy forward_auth verify endpoint (#709)
      FORWARD_AUTH_SECRET: ${FORWARD_AUTH_SECRET:-}
      # Cost caps / rate limiting (#711)
      CHAT_MAX_REQUESTS_PER_HOUR: ${CHAT_MAX_REQUESTS_PER_HOUR:-60}
      CHAT_MAX_REQUESTS_PER_DAY: ${CHAT_MAX_REQUESTS_PER_DAY:-500}
      CHAT_MAX_TOKENS_PER_DAY: ${CHAT_MAX_TOKENS_PER_DAY:-1000000}
    healthcheck:
      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 10s
    networks:
      - disinto-net
 volumes:
  forgejo-data:
  woodpecker-data:
  agent-data:
  project-repos:
  caddy_data:
  chat-config:
 networks:
  disinto-net:
@ -698,15 +396,6 @@ COMPOSEEOF
  # (Docker Compose cannot resolve it; it's a shell variable, not a .env var)
  sed -i "s|\${PROJECT_NAME:-project}|${PROJECT_NAME}|g" "$compose_file"
  # Patch WOODPECKER_REPO_ID — interpolate at generation time
  # (Docker Compose cannot resolve it; it's a shell variable, not a .env var)
  if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
    sed -i "s|PLACEHOLDER_WP_REPO_ID|${wp_repo_id}|g" "$compose_file"
  else
    # Default to empty if no repo_id found (agents will handle gracefully)
    sed -i "s|PLACEHOLDER_WP_REPO_ID||g" "$compose_file"
  fi
  # Patch the forgejo port mapping into the file if non-default
  if [ "$forge_port" != "3000" ]; then
    # Add port mapping to forgejo service so it's reachable from host during init
@ -716,35 +405,20 @@ COMPOSEEOF
  fi
  # Append local-model agent services if any are configured
  # (must run before CLAUDE_BIN_PLACEHOLDER substitution so the placeholder
  # in local-model services is also resolved)
  _generate_local_model_services "$compose_file"
-  # Resolve the Claude CLI binary path and persist as CLAUDE_BIN_DIR in .env.
+  # Patch the Claude CLI binary path — resolve from host PATH at init time.
  # docker-compose.yml references ${CLAUDE_BIN_DIR} so the value must be set.
  local claude_bin
  claude_bin="$(command -v claude 2>/dev/null || true)"
  if [ -n "$claude_bin" ]; then
    # Resolve symlinks to get the real binary path
    claude_bin="$(readlink -f "$claude_bin")"
    sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|g" "$compose_file"
  else
-    echo "Warning: claude CLI not found in PATH — set CLAUDE_BIN_DIR in .env manually" >&2
+    echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2
-    claude_bin="/usr/local/bin/claude"
+    sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|g" "$compose_file"
  fi
  # Persist CLAUDE_BIN_DIR into .env so docker-compose can resolve it.
  local env_file="${FACTORY_ROOT}/.env"
  if [ -f "$env_file" ]; then
    if grep -q "^CLAUDE_BIN_DIR=" "$env_file" 2>/dev/null; then
      sed -i "s|^CLAUDE_BIN_DIR=.*|CLAUDE_BIN_DIR=${claude_bin}|" "$env_file"
    else
      printf 'CLAUDE_BIN_DIR=%s\n' "$claude_bin" >> "$env_file"
    fi
  else
    printf 'CLAUDE_BIN_DIR=%s\n' "$claude_bin" > "$env_file"
  fi
  # In build mode, replace image: with build: for locally-built images
  if [ "$use_build" = true ]; then
    sed -i 's|^\(  agents:\)|\1|' "$compose_file"
    sed -i '/^    image: ghcr\.io\/disinto\/agents:/{s|image: ghcr\.io/disinto/agents:.*|build:\n      context: .\n      dockerfile: docker/agents/Dockerfile|}' "$compose_file"
    sed -i '/^    image: ghcr\.io\/disinto\/edge:/{s|image: ghcr\.io/disinto/edge:.*|build: ./docker/edge|}' "$compose_file"
  fi
  echo "Created: ${compose_file}"
@ -763,11 +437,7 @@ _generate_agent_docker_impl() {
  fi
 }
-# Generate docker/Caddyfile for the edge proxy.
+# Generate docker/Caddyfile template for edge proxy.
 # **CANONICAL SOURCE**: This generator is the single source of truth for the Caddyfile.
 # Output path: ${FACTORY_ROOT}/docker/Caddyfile (gitignored — generated artifact).
 # The edge compose service mounts this path as /etc/caddy/Caddyfile.
 # On a fresh clone, `disinto init` calls generate_caddyfile before first `disinto up`.
 _generate_caddyfile_impl() {
  local docker_dir="${FACTORY_ROOT}/docker"
  local caddyfile="${docker_dir}/Caddyfile"
@ -782,13 +452,8 @@ _generate_caddyfile_impl() {
 # IP-only binding at bootstrap; domain + TLS added later via vault resource request
 :80 {
    # Redirect root to Forgejo
    handle / {
        redir /forge/ 302
    }
    # Reverse proxy to Forgejo
-    handle /forge/* {
+    handle /forgejo/* {
        reverse_proxy forgejo:3000
    }
@ -797,28 +462,10 @@ _generate_caddyfile_impl() {
        reverse_proxy woodpecker:8000
    }
-    # Reverse proxy to staging
+    # Default: proxy to staging container
-    handle /staging/* {
+    handle {
        reverse_proxy staging:80
    }
    # Chat service — reverse proxy to disinto-chat backend (#705)
    # OAuth routes bypass forward_auth — unauthenticated users need these (#709)
    handle /chat/login {
        reverse_proxy chat:8080
    }
    handle /chat/oauth/callback {
        reverse_proxy chat:8080
    }
    # Defense-in-depth: forward_auth stamps X-Forwarded-User from session (#709)
    handle /chat/* {
        forward_auth chat:8080 {
            uri /chat/auth/verify
            copy_headers X-Forwarded-User
            header_up X-Forward-Auth-Secret {$FORWARD_AUTH_SECRET}
        }
        reverse_proxy chat:8080
    }
 }
 CADDYFILEEOF
--- a/lib/git-creds.sh
+++ b/lib/git-creds.sh
@ -1,173 +0,0 @@
 #!/usr/bin/env bash
 # git-creds.sh — Shared git credential helper configuration
 #
 # Configures a static credential helper for Forgejo password-based HTTP auth.
 # Forgejo 11.x rejects API tokens for git push (#361); password auth works.
 # This ensures all git operations (clone, fetch, push) use password auth
 # without needing tokens embedded in remote URLs (#604).
 #
 # Usage:
 #   source "${FACTORY_ROOT}/lib/git-creds.sh"
 #   configure_git_creds [HOME_DIR] [RUN_AS_CMD]
 #   repair_baked_cred_urls [--as RUN_AS_CMD] DIR [DIR ...]
 #
 # Globals expected:
 #   FORGE_PASS  — bot password for git HTTP auth
 #   FORGE_URL   — Forge instance URL (e.g. http://forgejo:3000)
 #   FORGE_TOKEN — API token (used to resolve bot username)
 set -euo pipefail
 # configure_git_creds [HOME_DIR] [RUN_AS_CMD]
 #   HOME_DIR    — home directory for the git user (default: $HOME or /home/agent)
 #   RUN_AS_CMD  — command prefix to run as another user (e.g. "gosu agent")
 #
 # Writes a credential helper script and configures git to use it globally.
 configure_git_creds() {
  local home_dir="${1:-${HOME:-/home/agent}}"
  local run_as="${2:-}"
  if [ -z "${FORGE_PASS:-}" ] || [ -z "${FORGE_URL:-}" ]; then
    return 0
  fi
  local forge_host forge_proto
  forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||')
  forge_proto=$(printf '%s' "$FORGE_URL" | sed 's|://.*||')
  local log_fn="${_GIT_CREDS_LOG_FN:-echo}"
  # Determine the bot username from FORGE_TOKEN identity with retry/backoff.
  # Never fall back to a hardcoded default — a wrong username paired with the
  # real password produces a cryptic 401 that's much harder to diagnose than
  # a missing credential helper (#741).
  local bot_user=""
  if [ -n "${FORGE_TOKEN:-}" ]; then
    local attempt
    for attempt in 1 2 3 4 5; do
      bot_user=$(curl -sf --max-time 5 -H "Authorization: token ${FORGE_TOKEN}" \
        "${FORGE_URL}/api/v1/user" 2>/dev/null | jq -r '.login // empty') || bot_user=""
      if [ -n "$bot_user" ]; then
        break
      fi
      $log_fn "WARNING: Forgejo not reachable (attempt ${attempt}/5) — retrying in ${attempt}s"
      sleep "$attempt"
    done
  fi
  if [ -z "$bot_user" ]; then
    $log_fn "ERROR: Could not determine bot username from FORGE_TOKEN after 5 attempts — credential helper NOT configured"
    $log_fn "ERROR: git push will fail until this is resolved. Restart the container after Forgejo is healthy."
    return 1
  fi
  # Export BOT_USER so downstream functions (e.g. configure_git_identity) can
  # reuse the resolved value without a redundant API call.
  export BOT_USER="$bot_user"
  local helper_path="${home_dir}/.git-credentials-helper"
  # Write a static credential helper script (git credential protocol)
  cat > "$helper_path" <<CREDEOF
 #!/bin/sh
 # Auto-generated git credential helper for Forgejo password auth (#361, #604)
 # Reads \$FORGE_PASS from env at runtime — file is safe to read on disk.
 # Only respond to "get" action; ignore "store" and "erase".
 [ "\$1" = "get" ] || exit 0
 # Read and discard stdin (git sends protocol/host info)
 cat >/dev/null
 echo "protocol=${forge_proto}"
 echo "host=${forge_host}"
 echo "username=${bot_user}"
 echo "password=\$FORGE_PASS"
 CREDEOF
  chmod 755 "$helper_path"
  # Set ownership and configure git if running as a different user
  if [ -n "$run_as" ]; then
    local target_user
    target_user=$(echo "$run_as" | awk '{print $NF}')
    chown "${target_user}:${target_user}" "$helper_path" 2>/dev/null || true
    $run_as bash -c "git config --global credential.helper '${helper_path}'"
  else
    git config --global credential.helper "$helper_path"
  fi
  # Set safe.directory to work around dubious ownership after container restart
  if [ -n "$run_as" ]; then
    $run_as bash -c "git config --global --add safe.directory '*'"
  else
    git config --global --add safe.directory '*'
  fi
  # Verify the credential helper actually authenticates (#741).
  # A helper that was written with a valid username but a mismatched password
  # would silently 401 on every push — catch it now.
  if ! curl -sf --max-time 5 -u "${bot_user}:${FORGE_PASS}" \
    "${FORGE_URL}/api/v1/user" >/dev/null 2>&1; then
    $log_fn "ERROR: credential helper verification failed — ${bot_user}:FORGE_PASS rejected by Forgejo"
    rm -f "$helper_path"
    return 1
  fi
  $log_fn "Git credential helper verified: ${bot_user}@${forge_host}"
 }
 # repair_baked_cred_urls [--as RUN_AS_CMD] DIR [DIR ...]
 #   Scans git repos under each DIR and rewrites remote URLs that contain
 #   embedded credentials (user:pass@host) to clean URLs.
 #   Logs each repair so operators can see the migration happened.
 #
 #   Optional --as flag runs git operations under the specified user wrapper
 #   (e.g. "gosu agent") to avoid dubious-ownership issues on user-owned repos.
 #
 # Set _GIT_CREDS_LOG_FN to a custom log function name (default: echo).
 repair_baked_cred_urls() {
  local log_fn="${_GIT_CREDS_LOG_FN:-echo}"
  local run_as=""
  local -a dirs=()
  while [ $# -gt 0 ]; do
    case "$1" in
      --as) shift; run_as="$1"; shift ;;
      *) dirs+=("$1"); shift ;;
    esac
  done
  for dir in "${dirs[@]}"; do
    [ -d "$dir" ] || continue
    # Find git repos: either dir itself or immediate subdirectories
    local -a repos=()
    if [ -d "${dir}/.git" ]; then
      repos+=("$dir")
    else
      local sub
      for sub in "$dir"/*/; do
        [ -d "${sub}.git" ] && repos+=("${sub%/}")
      done
    fi
    local repo
    for repo in "${repos[@]}"; do
      local url
      if [ -n "$run_as" ]; then
        url=$($run_as git -C "$repo" config --get remote.origin.url 2>/dev/null || true)
      else
        url=$(git -C "$repo" config --get remote.origin.url 2>/dev/null || true)
      fi
      [ -n "$url" ] || continue
      # Check if URL contains embedded credentials: http(s)://user:pass@host
      if printf '%s' "$url" | grep -qE '^https?://[^/]+@'; then
        # Strip credentials: http(s)://user:pass@host/path -> http(s)://host/path
        local clean_url
        clean_url=$(printf '%s' "$url" | sed -E 's|(https?://)[^@]+@|\1|')
        if [ -n "$run_as" ]; then
          $run_as git -C "$repo" remote set-url origin "$clean_url"
        else
          git -C "$repo" remote set-url origin "$clean_url"
        fi
        $log_fn "Repaired baked credentials in ${repo} (remote origin -> ${clean_url})"
      fi
    done
  done
 }
--- a/lib/hvault.sh
+++ b/lib/hvault.sh
@ -1,279 +0,0 @@
 #!/usr/bin/env bash
 # hvault.sh — HashiCorp Vault helper module
 #
 # Typed, audited helpers for Vault KV v2 access so no script re-implements
 # `curl -H "X-Vault-Token: ..."` ad-hoc.
 #
 # Usage: source this file, then call any hvault_* function.
 #
 # Environment:
 #   VAULT_ADDR  — Vault server address (required, no default)
 #   VAULT_TOKEN — auth token (precedence: env > /etc/vault.d/root.token)
 #
 # All functions emit structured JSON errors to stderr on failure.
 set -euo pipefail
 # ── Internal helpers ─────────────────────────────────────────────────────────
 # _hvault_err — emit structured JSON error to stderr
 # Args: func_name, message, [detail]
 _hvault_err() {
  local func="$1" msg="$2" detail="${3:-}"
  jq -n --arg func "$func" --arg msg "$msg" --arg detail "$detail" \
    '{error:true,function:$func,message:$msg,detail:$detail}' >&2
 }
 # _hvault_resolve_token — resolve VAULT_TOKEN from env or token file
 _hvault_resolve_token() {
  if [ -n "${VAULT_TOKEN:-}" ]; then
    return 0
  fi
  local token_file="/etc/vault.d/root.token"
  if [ -f "$token_file" ]; then
    VAULT_TOKEN="$(cat "$token_file")"
    export VAULT_TOKEN
    return 0
  fi
  return 1
 }
 # _hvault_check_prereqs — validate VAULT_ADDR and VAULT_TOKEN are set
 # Args: caller function name
 _hvault_check_prereqs() {
  local caller="$1"
  if [ -z "${VAULT_ADDR:-}" ]; then
    _hvault_err "$caller" "VAULT_ADDR is not set" "export VAULT_ADDR before calling $caller"
    return 1
  fi
  if ! _hvault_resolve_token; then
    _hvault_err "$caller" "VAULT_TOKEN is not set and /etc/vault.d/root.token not found" \
      "export VAULT_TOKEN or write token to /etc/vault.d/root.token"
    return 1
  fi
 }
 # _hvault_request — execute a Vault API request
 # Args: method, path, [data]
 # Outputs: response body to stdout
 # Returns: 0 on 2xx, 1 otherwise (error JSON to stderr)
 _hvault_request() {
  local method="$1" path="$2" data="${3:-}"
  local url="${VAULT_ADDR}/v1/${path}"
  local http_code body
  local tmpfile
  tmpfile="$(mktemp)"
  local curl_args=(
    -s
    -w '%{http_code}'
    -H "X-Vault-Token: ${VAULT_TOKEN}"
    -H "Content-Type: application/json"
    -X "$method"
    -o "$tmpfile"
  )
  if [ -n "$data" ]; then
    curl_args+=(-d "$data")
  fi
  http_code="$(curl "${curl_args[@]}" "$url")" || {
    _hvault_err "_hvault_request" "curl failed" "url=$url"
    rm -f "$tmpfile"
    return 1
  }
  body="$(cat "$tmpfile")"
  rm -f "$tmpfile"
  # Check HTTP status — 2xx is success
  case "$http_code" in
    2[0-9][0-9])
      printf '%s' "$body"
      return 0
      ;;
    *)
      _hvault_err "_hvault_request" "HTTP $http_code" "$body"
      return 1
      ;;
  esac
 }
 # ── Public API ───────────────────────────────────────────────────────────────
 # hvault_kv_get PATH [KEY]
 #   Read a KV v2 secret at PATH, optionally extract a single KEY.
 #   Outputs: JSON value (full data object, or single key value)
 hvault_kv_get() {
  local path="${1:-}"
  local key="${2:-}"
  if [ -z "$path" ]; then
    _hvault_err "hvault_kv_get" "PATH is required" "usage: hvault_kv_get PATH [KEY]"
    return 1
  fi
  _hvault_check_prereqs "hvault_kv_get" || return 1
  local response
  response="$(_hvault_request GET "secret/data/${path}")" || return 1
  if [ -n "$key" ]; then
    printf '%s' "$response" | jq -e -r --arg key "$key" '.data.data[$key]' 2>/dev/null || {
      _hvault_err "hvault_kv_get" "key not found" "key=$key path=$path"
      return 1
    }
  else
    printf '%s' "$response" | jq -e '.data.data' 2>/dev/null || {
      _hvault_err "hvault_kv_get" "failed to parse response" "path=$path"
      return 1
    }
  fi
 }
 # hvault_kv_put PATH KEY=VAL [KEY=VAL ...]
 #   Write a KV v2 secret at PATH. Accepts one or more KEY=VAL pairs.
 hvault_kv_put() {
  local path="${1:-}"
  shift || true
  if [ -z "$path" ] || [ $# -eq 0 ]; then
    _hvault_err "hvault_kv_put" "PATH and at least one KEY=VAL required" \
      "usage: hvault_kv_put PATH KEY=VAL [KEY=VAL ...]"
    return 1
  fi
  _hvault_check_prereqs "hvault_kv_put" || return 1
  # Build JSON payload from KEY=VAL pairs entirely via jq
  local payload='{"data":{}}'
  for kv in "$@"; do
    local k="${kv%%=*}"
    local v="${kv#*=}"
    if [ "$k" = "$kv" ]; then
      _hvault_err "hvault_kv_put" "invalid KEY=VAL pair" "got: $kv"
      return 1
    fi
    payload="$(printf '%s' "$payload" | jq --arg k "$k" --arg v "$v" '.data[$k] = $v')"
  done
  _hvault_request POST "secret/data/${path}" "$payload" >/dev/null
 }
 # hvault_kv_list PATH
 #   List keys at a KV v2 path.
 #   Outputs: JSON array of key names
 hvault_kv_list() {
  local path="${1:-}"
  if [ -z "$path" ]; then
    _hvault_err "hvault_kv_list" "PATH is required" "usage: hvault_kv_list PATH"
    return 1
  fi
  _hvault_check_prereqs "hvault_kv_list" || return 1
  local response
  response="$(_hvault_request LIST "secret/metadata/${path}")" || return 1
  printf '%s' "$response" | jq -e '.data.keys' 2>/dev/null || {
    _hvault_err "hvault_kv_list" "failed to parse response" "path=$path"
    return 1
  }
 }
 # hvault_policy_apply NAME FILE
 #   Idempotent policy upsert — create or update a Vault policy.
 hvault_policy_apply() {
  local name="${1:-}"
  local file="${2:-}"
  if [ -z "$name" ] || [ -z "$file" ]; then
    _hvault_err "hvault_policy_apply" "NAME and FILE are required" \
      "usage: hvault_policy_apply NAME FILE"
    return 1
  fi
  if [ ! -f "$file" ]; then
    _hvault_err "hvault_policy_apply" "policy file not found" "file=$file"
    return 1
  fi
  _hvault_check_prereqs "hvault_policy_apply" || return 1
  local policy_content
  policy_content="$(cat "$file")"
  local payload
  payload="$(jq -n --arg policy "$policy_content" '{"policy": $policy}')"
  _hvault_request PUT "sys/policies/acl/${name}" "$payload" >/dev/null
 }
 # hvault_jwt_login ROLE JWT
 #   Exchange a JWT for a short-lived Vault token.
 #   Outputs: client token string
 hvault_jwt_login() {
  local role="${1:-}"
  local jwt="${2:-}"
  if [ -z "$role" ] || [ -z "$jwt" ]; then
    _hvault_err "hvault_jwt_login" "ROLE and JWT are required" \
      "usage: hvault_jwt_login ROLE JWT"
    return 1
  fi
  # Only need VAULT_ADDR, not VAULT_TOKEN (we're obtaining a token)
  if [ -z "${VAULT_ADDR:-}" ]; then
    _hvault_err "hvault_jwt_login" "VAULT_ADDR is not set"
    return 1
  fi
  local payload
  payload="$(jq -n --arg role "$role" --arg jwt "$jwt" \
    '{"role": $role, "jwt": $jwt}')"
  local response
  # JWT login does not require an existing token — use curl directly
  local tmpfile http_code
  tmpfile="$(mktemp)"
  http_code="$(curl -s -w '%{http_code}' \
    -H "Content-Type: application/json" \
    -X POST \
    -d "$payload" \
    -o "$tmpfile" \
    "${VAULT_ADDR}/v1/auth/jwt/login")" || {
    _hvault_err "hvault_jwt_login" "curl failed"
    rm -f "$tmpfile"
    return 1
  }
  local body
  body="$(cat "$tmpfile")"
  rm -f "$tmpfile"
  case "$http_code" in
    2[0-9][0-9])
      printf '%s' "$body" | jq -e -r '.auth.client_token' 2>/dev/null || {
        _hvault_err "hvault_jwt_login" "failed to extract client_token" "$body"
        return 1
      }
      ;;
    *)
      _hvault_err "hvault_jwt_login" "HTTP $http_code" "$body"
      return 1
      ;;
  esac
 }
 # hvault_token_lookup
 #   Returns TTL, policies, and accessor for the current token.
 #   Outputs: JSON object with ttl, policies, accessor fields
 hvault_token_lookup() {
  _hvault_check_prereqs "hvault_token_lookup" || return 1
  local response
  response="$(_hvault_request GET "auth/token/lookup-self")" || return 1
  printf '%s' "$response" | jq -e '{
    ttl: .data.ttl,
    policies: .data.policies,
    accessor: .data.accessor,
    display_name: .data.display_name
  }' 2>/dev/null || {
    _hvault_err "hvault_token_lookup" "failed to parse token info"
    return 1
  }
 }
--- a/lib/issue-lifecycle.sh
+++ b/lib/issue-lifecycle.sh
@ -79,27 +79,6 @@ _ilc_backlog_id()      { _ilc_ensure_label_id "backlog"     "#0075ca"; }
 _ilc_in_progress_id()  { _ilc_ensure_label_id "in-progress" "#1d76db"; }
 _ilc_blocked_id()      { _ilc_ensure_label_id "blocked"     "#e11d48"; }
 # ---------------------------------------------------------------------------
 # Labels that indicate an issue belongs to a non-dev agent workflow.
 # Any issue carrying one of these should NOT be touched by dev-poll's
 # stale-detection or orphan-recovery logic.  See issue #608.
 # ---------------------------------------------------------------------------
 _ILC_NON_DEV_LABELS="bug-report vision in-triage prediction/unreviewed prediction/dismissed action formula"
 # issue_is_dev_claimable COMMA_SEPARATED_LABELS
 # Returns 0 if the issue's labels are compatible with dev-agent ownership,
 # 1 if any non-dev label is present (meaning another agent owns this issue).
 issue_is_dev_claimable() {
  local labels="$1"
  local lbl
  for lbl in $_ILC_NON_DEV_LABELS; do
    if echo ",$labels," | grep -qF ",$lbl,"; then
      return 1
    fi
  done
  return 0
 }
 # ---------------------------------------------------------------------------
 # issue_claim — assign issue to bot, add "in-progress" label, remove "backlog".
 # Args: issue_number
--- a/lib/load-project.sh
+++ b/lib/load-project.sh
@ -97,18 +97,28 @@ done <<< "$_PROJECT_VARS"
 # FORGE_URL: TOML forge_url > existing FORGE_URL > default
 export FORGE_URL="${FORGE_URL:-http://localhost:3000}"
 if [ -n "$FORGE_REPO" ]; then
-  export FORGE_API_BASE="${FORGE_URL}/api/v1"
+  export FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
  export FORGE_API="${FORGE_API_BASE}/repos/${FORGE_REPO}"
  export FORGE_WEB="${FORGE_URL}/${FORGE_REPO}"
  # Extract repo owner (first path segment of owner/repo)
  export FORGE_REPO_OWNER="${FORGE_REPO%%/*}"
 fi
-# PROJECT_REPO_ROOT and OPS_REPO_ROOT: no fallback derivation from USER/HOME.
+# Derive PROJECT_REPO_ROOT if not explicitly set
-# These must be set by the entrypoint (container) or the TOML (host CLI).
+if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then
-# Inside the container, the entrypoint exports the correct paths before agent
+  export PROJECT_REPO_ROOT="/home/${USER}/${PROJECT_NAME}"
-# scripts source env.sh; the TOML's host-perspective paths are skipped by the
+fi
-# DISINTO_CONTAINER guard above.
+
 # Derive OPS_REPO_ROOT if not explicitly set
 if [ -z "${OPS_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then
  export OPS_REPO_ROOT="/home/${USER}/${PROJECT_NAME}-ops"
 fi
 # Inside the container, always derive repo paths from PROJECT_NAME — the TOML
 # carries host-perspective paths that do not exist in the container filesystem.
 if [ "${DISINTO_CONTAINER:-}" = "1" ] && [ -n "${PROJECT_NAME:-}" ]; then
  export PROJECT_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}"
  export OPS_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}-ops"
 fi
 # Derive FORGE_OPS_REPO if not explicitly set
 if [ -z "${FORGE_OPS_REPO:-}" ] && [ -n "${FORGE_REPO:-}" ]; then
--- a/lib/mirrors.sh
+++ b/lib/mirrors.sh
@ -1,10 +1,8 @@
 #!/usr/bin/env bash
-# mirrors.sh — Mirror helpers: push to remotes + register pull mirrors via API.
+# mirrors.sh — Push primary branch + tags to configured mirror remotes.
 #
 # Usage: source lib/mirrors.sh; mirror_push
 #        source lib/mirrors.sh; mirror_pull_register <clone_url> <owner> <repo_name> [interval]
 # Requires: PROJECT_REPO_ROOT, PRIMARY_BRANCH, MIRROR_* vars from load-project.sh
 #           FORGE_API_BASE, FORGE_TOKEN for pull-mirror registration
 # shellcheck disable=SC2154  # globals set by load-project.sh / calling script
@ -39,73 +37,3 @@ mirror_push() {
    log "mirror: pushed to ${name} (pid $!)"
  done
 }
 # ---------------------------------------------------------------------------
 # mirror_pull_register — register a Forgejo pull mirror via the /repos/migrate API.
 #
 # Creates a new repo as a pull mirror of an external source.  Works against
 # empty target repos (the repo is created by the API call itself).
 #
 # Usage:
 #   mirror_pull_register <clone_url> <owner> <repo_name> [interval]
 #
 # Args:
 #   clone_url  — HTTPS URL of the source repo (e.g. https://codeberg.org/johba/disinto.git)
 #   owner      — Forgejo org or user that will own the mirror repo
 #   repo_name  — name of the new mirror repo on Forgejo
 #   interval   — sync interval (default: "8h0m0s"; Forgejo duration format)
 #
 # Requires:
 #   FORGE_API_BASE, FORGE_TOKEN (from env.sh)
 #
 # Returns 0 on success, 1 on failure.  Prints the new repo JSON to stdout.
 # ---------------------------------------------------------------------------
 mirror_pull_register() {
  local clone_url="$1"
  local owner="$2"
  local repo_name="$3"
  local interval="${4:-8h0m0s}"
  if [ -z "${FORGE_API_BASE:-}" ] || [ -z "${FORGE_TOKEN:-}" ]; then
    echo "ERROR: FORGE_API_BASE and FORGE_TOKEN must be set" >&2
    return 1
  fi
  if [ -z "$clone_url" ] || [ -z "$owner" ] || [ -z "$repo_name" ]; then
    echo "Usage: mirror_pull_register <clone_url> <owner> <repo_name> [interval]" >&2
    return 1
  fi
  local payload
  payload=$(jq -n \
    --arg clone_addr "$clone_url" \
    --arg repo_name  "$repo_name" \
    --arg repo_owner "$owner" \
    --arg interval   "$interval" \
    '{
      clone_addr:      $clone_addr,
      repo_name:       $repo_name,
      repo_owner:      $repo_owner,
      mirror:          true,
      mirror_interval: $interval,
      service:         "git"
    }')
  local http_code body
  body=$(curl -s -w "\n%{http_code}" -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API_BASE}/repos/migrate" \
    -d "$payload")
  http_code=$(printf '%s' "$body" | tail -n1)
  body=$(printf '%s' "$body" | sed '$d')
  if [ "$http_code" -ge 200 ] && [ "$http_code" -lt 300 ]; then
    printf '%s\n' "$body"
    return 0
  else
    echo "ERROR: mirror_pull_register failed (HTTP ${http_code}): ${body}" >&2
    return 1
  fi
 }
--- a/lib/ops-setup.sh
+++ b/lib/ops-setup.sh
@ -5,10 +5,10 @@
 #   source "$(dirname "$0")/../lib/ops-setup.sh"
 #
 # Required globals: FORGE_URL, FORGE_TOKEN, FACTORY_ROOT
-# Optional: HUMAN_TOKEN (falls back to FORGE_TOKEN for admin operations)
+# Optional: admin_token (falls back to FORGE_TOKEN for admin operations)
 #
 # Functions:
-#   setup_ops_repo <forge_url> <ops_slug> <ops_root> [primary_branch] [admin_token]
+#   setup_ops_repo <forge_url> <ops_slug> <ops_root> [primary_branch]
 #     - Create ops repo on Forgejo if it doesn't exist
 #     - Configure bot collaborators with appropriate permissions
 #     - Clone or initialize ops repo locally
@ -26,7 +26,6 @@ set -euo pipefail
 setup_ops_repo() {
  local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}"
  local admin_token="${5:-${HUMAN_TOKEN:-${FORGE_TOKEN}}}"
  local org_name="${ops_slug%%/*}"
  local ops_name="${ops_slug##*/}"
@ -54,57 +53,30 @@ setup_ops_repo() {
  # If not found, try to create it in the configured namespace
  if [ -z "$actual_ops_slug" ]; then
    echo "Creating ops repo in namespace: ${org_name}"
-
+    # Create org if it doesn't exist
-    # Determine if target namespace is a user or an org
+    curl -sf -X POST \
-    local ns_type=""
+      -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
    if curl -sf -H "Authorization: token ${admin_token}" \
      "${forge_url}/api/v1/users/${org_name}" >/dev/null 2>&1; then
      # User endpoint exists - check if it's an org
      if curl -sf -H "Authorization: token ${admin_token}" \
        "${forge_url}/api/v1/users/${org_name}" | grep -q '"is_org":true'; then
        ns_type="org"
      else
        ns_type="user"
      fi
    elif curl -sf -H "Authorization: token ${admin_token}" \
      "${forge_url}/api/v1/orgs/${org_name}" >/dev/null 2>&1; then
      # Org endpoint exists
      ns_type="org"
    fi
    local create_endpoint="" via_msg=""
    if [ "$ns_type" = "org" ]; then
      # Org namespace — use org API
      create_endpoint="/api/v1/orgs/${org_name}/repos"
      # Create org if it doesn't exist
      curl -sf -X POST \
        -H "Authorization: token ${admin_token}" \
        -H "Content-Type: application/json" \
        "${forge_url}/api/v1/orgs" \
        -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true
    else
      # User namespace — use admin API (requires admin token)
      create_endpoint="/api/v1/admin/users/${org_name}/repos"
      via_msg=" (via admin API)"
    fi
    if curl -sf -X POST \
      -H "Authorization: token ${admin_token}" \
      -H "Content-Type: application/json" \
-      "${forge_url}${create_endpoint}" \
+      "${forge_url}/api/v1/orgs" \
      -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true
    if curl -sf -X POST \
      -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/orgs/${org_name}/repos" \
      -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then
      actual_ops_slug="${org_name}/${ops_name}"
-      echo "Ops repo: ${actual_ops_slug} created on Forgejo${via_msg}"
+      echo "Ops repo: ${actual_ops_slug} created on Forgejo"
    else
      # Fallback: use admin API to create repo under the target namespace
      http_code=$(curl -s -o /dev/null -w "%{http_code}" \
        -X POST \
-        -H "Authorization: token ${admin_token}" \
+        -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
        -H "Content-Type: application/json" \
-        "${forge_url}${create_endpoint}" \
+        "${forge_url}/api/v1/admin/users/${org_name}/repos" \
        -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" 2>/dev/null || echo "0")
      if [ "$http_code" = "201" ]; then
        actual_ops_slug="${org_name}/${ops_name}"
-        echo "Ops repo: ${actual_ops_slug} created on Forgejo${via_msg}"
+        echo "Ops repo: ${actual_ops_slug} created on Forgejo (via admin API)"
      else
        echo "Error: failed to create ops repo '${org_name}/${ops_name}' (HTTP ${http_code})" >&2
        return 1
@ -132,7 +104,7 @@ setup_ops_repo() {
  for bot_user in "${!bot_permissions[@]}"; do
    bot_perm="${bot_permissions[$bot_user]}"
    if curl -sf -X PUT \
-      -H "Authorization: token ${admin_token}" \
+      -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
      -H "Content-Type: application/json" \
      "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/${bot_user}" \
      -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1; then
@ -144,7 +116,7 @@ setup_ops_repo() {
  # Add disinto-admin as admin collaborator
  if curl -sf -X PUT \
-    -H "Authorization: token ${admin_token}" \
+    -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
    -H "Content-Type: application/json" \
    "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/disinto-admin" \
    -d '{"permission":"admin"}' >/dev/null 2>&1; then
@ -153,10 +125,11 @@ setup_ops_repo() {
    echo "  ! disinto-admin = admin (already set or failed)"
  fi
-  # Clone ops repo locally if not present — use clean URL, credential helper
+  # Clone ops repo locally if not present
  # supplies auth (#604).
  if [ ! -d "${ops_root}/.git" ]; then
-    local clone_url="${forge_url}/${actual_ops_slug}.git"
+    local auth_url
    auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|")
    local clone_url="${auth_url}/${actual_ops_slug}.git"
    echo "Cloning: ops repo -> ${ops_root}"
    if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then
      echo "Ops repo: ${actual_ops_slug} cloned successfully"
@ -314,37 +287,30 @@ migrate_ops_repo() {
  echo "── Ops repo migration ───────────────────────────────────"
  echo "Checking ${ops_root} for missing directories and files..."
  # Change to ops_root directory to ensure all git operations use the correct repo
  # This prevents "fatal: not in a git directory" errors from stray git commands
  local orig_dir
  orig_dir=$(pwd)
  cd "$ops_root" || {
    echo "Error: failed to change to ${ops_root}" >&2
    return 1
  }
  local migrated=false
  # Canonical ops repo structure (post #407)
  # Directories to ensure exist with .gitkeep files
  local -a dir_keepfiles=(
-    "vault/pending/.gitkeep"
+    "${ops_root}/vault/pending/.gitkeep"
-    "vault/approved/.gitkeep"
+    "${ops_root}/vault/approved/.gitkeep"
-    "vault/fired/.gitkeep"
+    "${ops_root}/vault/fired/.gitkeep"
-    "vault/rejected/.gitkeep"
+    "${ops_root}/vault/rejected/.gitkeep"
-    "knowledge/.gitkeep"
+    "${ops_root}/knowledge/.gitkeep"
-    "evidence/engagement/.gitkeep"
+    "${ops_root}/evidence/engagement/.gitkeep"
-    "evidence/red-team/.gitkeep"
+    "${ops_root}/evidence/red-team/.gitkeep"
-    "evidence/holdout/.gitkeep"
+    "${ops_root}/evidence/holdout/.gitkeep"
-    "evidence/evolution/.gitkeep"
+    "${ops_root}/evidence/evolution/.gitkeep"
-    "evidence/user-test/.gitkeep"
+    "${ops_root}/evidence/user-test/.gitkeep"
-    "sprints/.gitkeep"
+    "${ops_root}/sprints/.gitkeep"
  )
  # Create missing directories and .gitkeep files
  for keepfile in "${dir_keepfiles[@]}"; do
    local dir
    dir=$(dirname "$keepfile")
    if [ ! -f "$keepfile" ]; then
-      mkdir -p "$(dirname "$keepfile")"
+      mkdir -p "$dir"
      touch "$keepfile"
      echo "  + Created: ${keepfile}"
      migrated=true
@ -353,9 +319,9 @@ migrate_ops_repo() {
  # Template files to create if missing (starter content)
  local -a template_files=(
-    "portfolio.md"
+    "${ops_root}/portfolio.md"
-    "prerequisites.md"
+    "${ops_root}/prerequisites.md"
-    "RESOURCES.md"
+    "${ops_root}/RESOURCES.md"
  )
  for tfile in "${template_files[@]}"; do
@ -377,33 +343,26 @@ migrate_ops_repo() {
  # Commit and push changes if any were made
  if [ "$migrated" = true ]; then
    # Auto-configure repo-local git identity if missing
-    if [ -z "$(git config user.name 2>/dev/null)" ]; then
+    if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then
-      git config user.name "disinto-admin"
+      git -C "$ops_root" config user.name "disinto-admin"
    fi
-    if [ -z "$(git config user.email 2>/dev/null)" ]; then
+    if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then
-      git config user.email "disinto-admin@localhost"
+      git -C "$ops_root" config user.email "disinto-admin@localhost"
    fi
-    git add -A
+    git -C "$ops_root" add -A
-    if ! git diff --cached --quiet 2>/dev/null; then
+    if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then
-      if ! git commit -m "chore: migrate ops repo structure to canonical layout" -q; then
+      git -C "$ops_root" commit -m "chore: migrate ops repo structure to canonical layout" -q
        echo "Error: failed to commit migration changes" >&2
        cd "$orig_dir"
        return 1
      fi
      # Push if remote exists
-      if git remote get-url origin >/dev/null 2>&1; then
+      if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then
-        if ! git push origin "${primary_branch}" -q 2>/dev/null; then
+        if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then
          echo "Warning: failed to push migration to ops repo" >&2
        else
          echo "Migrated:  ops repo structure updated and pushed"
        else
          echo "Warning: failed to push migration to ops repo" >&2
        fi
      fi
    fi
  else
    echo "  (all directories and files already present)"
  fi
  # Return to original directory
  cd "$orig_dir"
 }
--- a/lib/release.sh
+++ b/lib/release.sh
@ -18,8 +18,8 @@
 # =============================================================================
 set -euo pipefail
-# Source action-vault.sh for _vault_log helper
+# Source vault.sh for _vault_log helper
-source "${FACTORY_ROOT}/lib/action-vault.sh"
+source "${FACTORY_ROOT}/lib/vault.sh"
 # Assert required globals are set before using this module.
 _assert_release_globals() {
--- a/lib/sprint-filer.sh
+++ b/lib/sprint-filer.sh
@ -1,585 +0,0 @@
 #!/usr/bin/env bash
 # =============================================================================
 # sprint-filer.sh — Parse merged sprint PRs and file sub-issues via filer-bot
 #
 # Invoked by the ops-filer Woodpecker pipeline after a sprint PR merges on the
 # ops repo main branch.  Parses each sprints/*.md file for a structured
 # ## Sub-issues block (filer:begin/end markers), then creates idempotent
 # Forgejo issues on the project repo using FORGE_FILER_TOKEN.
 #
 # Permission model (#764):
 #   filer-bot has issues:write on the project repo.
 #   architect-bot is read-only on the project repo.
 #
 # Usage:
 #   sprint-filer.sh <sprint-file.md>          — file sub-issues from one sprint
 #   sprint-filer.sh --all <sprints-dir>       — scan all sprint files in dir
 #
 # Environment:
 #   FORGE_FILER_TOKEN   — filer-bot API token (issues:write on project repo)
 #   FORGE_API           — project repo API base (e.g. http://forgejo:3000/api/v1/repos/org/repo)
 #   FORGE_API_BASE      — API base URL (e.g. http://forgejo:3000/api/v1)
 # =============================================================================
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # Source env.sh only if not already loaded (allows standalone + sourced use)
 if [ -z "${FACTORY_ROOT:-}" ]; then
  FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
  # shellcheck source=env.sh
  source "$SCRIPT_DIR/env.sh"
 fi
 # ── Logging ──────────────────────────────────────────────────────────────
 LOG_AGENT="${LOG_AGENT:-filer}"
 filer_log() {
  printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$LOG_AGENT" "$*" >&2
 }
 # ── Validate required environment ────────────────────────────────────────
 : "${FORGE_FILER_TOKEN:?sprint-filer.sh requires FORGE_FILER_TOKEN}"
 : "${FORGE_API:?sprint-filer.sh requires FORGE_API}"
 # ── Paginated Forgejo API fetch ──────────────────────────────────────────
 # Reuses forge_api_all from lib/env.sh with FORGE_FILER_TOKEN.
 # Args: api_path (e.g. /issues?state=all&type=issues)
 # Output: merged JSON array to stdout
 filer_api_all() { forge_api_all "$1" "$FORGE_FILER_TOKEN"; }
 # ── Parse sub-issues block from a sprint markdown file ───────────────────
 # Extracts the YAML-in-markdown between <!-- filer:begin --> and <!-- filer:end -->
 # Args: sprint_file_path
 # Output: the raw sub-issues block (YAML lines) to stdout
 # Returns: 0 if block found, 1 if not found or malformed
 parse_subissues_block() {
  local sprint_file="$1"
  if [ ! -f "$sprint_file" ]; then
    filer_log "ERROR: sprint file not found: ${sprint_file}"
    return 1
  fi
  local in_block=false
  local block=""
  local found=false
  while IFS= read -r line; do
    if [[ "$line" == *"<!-- filer:begin -->"* ]]; then
      in_block=true
      found=true
      continue
    fi
    if [[ "$line" == *"<!-- filer:end -->"* ]]; then
      in_block=false
      continue
    fi
    if [ "$in_block" = true ]; then
      block+="${line}"$'\n'
    fi
  done < "$sprint_file"
  if [ "$found" = false ]; then
    filer_log "No filer:begin/end block found in ${sprint_file}"
    return 1
  fi
  if [ "$in_block" = true ]; then
    filer_log "ERROR: malformed sub-issues block in ${sprint_file} — filer:begin without filer:end"
    return 1
  fi
  if [ -z "$block" ]; then
    filer_log "WARNING: empty sub-issues block in ${sprint_file}"
    return 1
  fi
  printf '%s' "$block"
 }
 # ── Extract vision issue number from sprint file ─────────────────────────
 # Looks for "#N" references specifically in the "## Vision issues" section
 # to avoid picking up cross-links or related-issue mentions earlier in the file.
 # Falls back to first #N in the file if no "## Vision issues" section found.
 # Args: sprint_file_path
 # Output: first vision issue number found
 extract_vision_issue() {
  local sprint_file="$1"
  # Try to extract from "## Vision issues" section first
  local in_section=false
  local result=""
  while IFS= read -r line; do
    if [[ "$line" =~ ^##[[:space:]]+Vision[[:space:]]+issues ]]; then
      in_section=true
      continue
    fi
    # Stop at next heading
    if [ "$in_section" = true ] && [[ "$line" =~ ^## ]]; then
      break
    fi
    if [ "$in_section" = true ]; then
      result=$(printf '%s' "$line" | grep -oE '#[0-9]+' | head -1 | tr -d '#')
      if [ -n "$result" ]; then
        printf '%s' "$result"
        return 0
      fi
    fi
  done < "$sprint_file"
  # Fallback: first #N in the entire file
  grep -oE '#[0-9]+' "$sprint_file" | head -1 | tr -d '#'
 }
 # ── Extract sprint slug from file path ───────────────────────────────────
 # Args: sprint_file_path
 # Output: slug (filename without .md)
 extract_sprint_slug() {
  local sprint_file="$1"
  basename "$sprint_file" .md
 }
 # ── Parse individual sub-issue entries from the block ────────────────────
 # The block is a simple YAML-like format:
 #   - id: foo
 #     title: "..."
 #     labels: [backlog, priority]
 #     depends_on: [bar]
 #     body: |
 #       multi-line body
 #
 # Args: raw_block (via stdin)
 # Output: JSON array of sub-issue objects
 parse_subissue_entries() {
  local block
  block=$(cat)
  # Use awk to parse the YAML-like structure into JSON
  printf '%s' "$block" | awk '
  BEGIN {
    printf "["
    first = 1
    inbody = 0
    id = ""; title = ""; labels = ""; depends = ""; body = ""
  }
  function flush_entry() {
    if (id == "") return
    if (!first) printf ","
    first = 0
    # Escape JSON special characters in body
    gsub(/\\/, "\\\\", body)
    gsub(/"/, "\\\"", body)
    gsub(/\t/, "\\t", body)
    # Replace newlines with \n for JSON
    gsub(/\n/, "\\n", body)
    # Remove trailing \n
    sub(/\\n$/, "", body)
    # Clean up title (remove surrounding quotes)
    gsub(/^"/, "", title)
    gsub(/"$/, "", title)
    printf "{\"id\":\"%s\",\"title\":\"%s\",\"labels\":%s,\"depends_on\":%s,\"body\":\"%s\"}", id, title, labels, depends, body
    id = ""; title = ""; labels = "[]"; depends = "[]"; body = ""
    inbody = 0
  }
  /^- id:/ {
    flush_entry()
    sub(/^- id: */, "")
    id = $0
    labels = "[]"
    depends = "[]"
    next
  }
  /^  title:/ {
    sub(/^  title: */, "")
    title = $0
    # Remove surrounding quotes
    gsub(/^"/, "", title)
    gsub(/"$/, "", title)
    next
  }
  /^  labels:/ {
    sub(/^  labels: */, "")
    # Convert [a, b] to JSON array ["a","b"]
    gsub(/\[/, "", $0)
    gsub(/\]/, "", $0)
    n = split($0, arr, /, */)
    labels = "["
    for (i = 1; i <= n; i++) {
      gsub(/^ */, "", arr[i])
      gsub(/ *$/, "", arr[i])
      if (arr[i] != "") {
        if (i > 1) labels = labels ","
        labels = labels "\"" arr[i] "\""
      }
    }
    labels = labels "]"
    next
  }
  /^  depends_on:/ {
    sub(/^  depends_on: */, "")
    gsub(/\[/, "", $0)
    gsub(/\]/, "", $0)
    n = split($0, arr, /, */)
    depends = "["
    for (i = 1; i <= n; i++) {
      gsub(/^ */, "", arr[i])
      gsub(/ *$/, "", arr[i])
      if (arr[i] != "") {
        if (i > 1) depends = depends ","
        depends = depends "\"" arr[i] "\""
      }
    }
    depends = depends "]"
    next
  }
  /^  body: *\|/ {
    inbody = 1
    body = ""
    next
  }
  inbody && /^    / {
    sub(/^    /, "")
    body = body $0 "\n"
    next
  }
  inbody && !/^    / && !/^$/ {
    inbody = 0
    # This line starts a new field or entry — re-process it
    # (awk does not support re-scanning, so handle common cases)
    if ($0 ~ /^- id:/) {
      flush_entry()
      sub(/^- id: */, "")
      id = $0
      labels = "[]"
      depends = "[]"
    }
  }
  END {
    flush_entry()
    printf "]"
  }
  '
 }
 # ── Check if sub-issue already exists (idempotency) ─────────────────────
 # Searches for the decomposed-from marker in existing issues.
 # Args: vision_issue_number sprint_slug subissue_id
 # Returns: 0 if already exists, 1 if not
 subissue_exists() {
  local vision_issue="$1"
  local sprint_slug="$2"
  local subissue_id="$3"
  local marker="<!-- decomposed-from: #${vision_issue}, sprint: ${sprint_slug}, id: ${subissue_id} -->"
  # Search all issues (paginated) for the exact marker
  local issues_json
  issues_json=$(filer_api_all "/issues?state=all&type=issues")
  if printf '%s' "$issues_json" | jq -e --arg marker "$marker" \
    '[.[] | select(.body // "" | contains($marker))] | length > 0' >/dev/null 2>&1; then
    return 0  # Already exists
  fi
  return 1  # Does not exist
 }
 # ── Resolve label names to IDs ───────────────────────────────────────────
 # Args: label_names_json (JSON array of strings)
 # Output: JSON array of label IDs
 resolve_label_ids() {
  local label_names_json="$1"
  # Fetch all labels from project repo
  local all_labels
  all_labels=$(curl -sf -H "Authorization: token ${FORGE_FILER_TOKEN}" \
    "${FORGE_API}/labels" 2>/dev/null) || all_labels="[]"
  # Map names to IDs
  printf '%s' "$label_names_json" | jq -r '.[]' | while IFS= read -r label_name; do
    [ -z "$label_name" ] && continue
    printf '%s' "$all_labels" | jq -r --arg name "$label_name" \
      '.[] | select(.name == $name) | .id' 2>/dev/null
  done | jq -Rs 'split("\n") | map(select(. != "") | tonumber)'
 }
 # ── Add in-progress label to vision issue ────────────────────────────────
 # Args: vision_issue_number
 add_inprogress_label() {
  local issue_num="$1"
  local labels_json
  labels_json=$(curl -sf -H "Authorization: token ${FORGE_FILER_TOKEN}" \
    "${FORGE_API}/labels" 2>/dev/null) || return 1
  local label_id
  label_id=$(printf '%s' "$labels_json" | jq -r '.[] | select(.name == "in-progress") | .id' 2>/dev/null) || true
  if [ -z "$label_id" ]; then
    filer_log "WARNING: in-progress label not found"
    return 1
  fi
  if curl -sf -X POST \
    -H "Authorization: token ${FORGE_FILER_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/issues/${issue_num}/labels" \
    -d "{\"labels\": [${label_id}]}" >/dev/null 2>&1; then
    filer_log "Added in-progress label to vision issue #${issue_num}"
    return 0
  else
    filer_log "WARNING: failed to add in-progress label to vision issue #${issue_num}"
    return 1
  fi
 }
 # ── File sub-issues from a sprint file ───────────────────────────────────
 # This is the main entry point. Parses the sprint file, extracts sub-issues,
 # and creates them idempotently via the Forgejo API.
 # Args: sprint_file_path
 # Returns: 0 on success, 1 on any error (fail-fast)
 file_subissues() {
  local sprint_file="$1"
  filer_log "Processing sprint file: ${sprint_file}"
  # Extract metadata
  local vision_issue sprint_slug
  vision_issue=$(extract_vision_issue "$sprint_file")
  sprint_slug=$(extract_sprint_slug "$sprint_file")
  if [ -z "$vision_issue" ]; then
    filer_log "ERROR: could not extract vision issue number from ${sprint_file}"
    return 1
  fi
  filer_log "Vision issue: #${vision_issue}, sprint slug: ${sprint_slug}"
  # Parse the sub-issues block
  local raw_block
  raw_block=$(parse_subissues_block "$sprint_file") || return 1
  # Parse individual entries
  local entries_json
  entries_json=$(printf '%s' "$raw_block" | parse_subissue_entries)
  # Validate parsing produced valid JSON
  if ! printf '%s' "$entries_json" | jq empty 2>/dev/null; then
    filer_log "ERROR: failed to parse sub-issues block as valid JSON in ${sprint_file}"
    return 1
  fi
  local entry_count
  entry_count=$(printf '%s' "$entries_json" | jq 'length')
  if [ "$entry_count" -eq 0 ]; then
    filer_log "WARNING: no sub-issue entries found in ${sprint_file}"
    return 1
  fi
  filer_log "Found ${entry_count} sub-issue(s) to file"
  # File each sub-issue (fail-fast on first error)
  local filed_count=0
  local i=0
  while [ "$i" -lt "$entry_count" ]; do
    local entry
    entry=$(printf '%s' "$entries_json" | jq ".[$i]")
    local subissue_id subissue_title subissue_body labels_json
    subissue_id=$(printf '%s' "$entry" | jq -r '.id')
    subissue_title=$(printf '%s' "$entry" | jq -r '.title')
    subissue_body=$(printf '%s' "$entry" | jq -r '.body')
    labels_json=$(printf '%s' "$entry" | jq -c '.labels')
    if [ -z "$subissue_id" ] || [ "$subissue_id" = "null" ]; then
      filer_log "ERROR: sub-issue entry at index ${i} has no id — aborting"
      return 1
    fi
    if [ -z "$subissue_title" ] || [ "$subissue_title" = "null" ]; then
      filer_log "ERROR: sub-issue '${subissue_id}' has no title — aborting"
      return 1
    fi
    # Idempotency check
    if subissue_exists "$vision_issue" "$sprint_slug" "$subissue_id"; then
      filer_log "Sub-issue '${subissue_id}' already exists — skipping"
      i=$((i + 1))
      continue
    fi
    # Append decomposed-from marker to body
    local marker="<!-- decomposed-from: #${vision_issue}, sprint: ${sprint_slug}, id: ${subissue_id} -->"
    local full_body="${subissue_body}
 ${marker}"
    # Resolve label names to IDs
    local label_ids
    label_ids=$(resolve_label_ids "$labels_json")
    # Build issue payload using jq for safe JSON construction
    local payload
    payload=$(jq -n \
      --arg title "$subissue_title" \
      --arg body "$full_body" \
      --argjson labels "$label_ids" \
      '{title: $title, body: $body, labels: $labels}')
    # Create the issue
    local response
    response=$(curl -sf -X POST \
      -H "Authorization: token ${FORGE_FILER_TOKEN}" \
      -H "Content-Type: application/json" \
      "${FORGE_API}/issues" \
      -d "$payload" 2>/dev/null) || {
      filer_log "ERROR: failed to create sub-issue '${subissue_id}' — aborting (${filed_count}/${entry_count} filed so far)"
      return 1
    }
    local new_issue_num
    new_issue_num=$(printf '%s' "$response" | jq -r '.number // empty')
    filer_log "Filed sub-issue '${subissue_id}' as #${new_issue_num}: ${subissue_title}"
    filed_count=$((filed_count + 1))
    i=$((i + 1))
  done
  # Add in-progress label to the vision issue
  add_inprogress_label "$vision_issue" || true
  filer_log "Successfully filed ${filed_count}/${entry_count} sub-issue(s) for sprint ${sprint_slug}"
  return 0
 }
 # ── Vision lifecycle: close completed vision issues ──────────────────────
 # Checks open vision issues and closes any whose sub-issues are all closed.
 # Uses the decomposed-from marker to find sub-issues.
 check_and_close_completed_visions() {
  filer_log "Checking for vision issues with all sub-issues complete..."
  local vision_issues_json
  vision_issues_json=$(filer_api_all "/issues?labels=vision&state=open")
  if [ "$vision_issues_json" = "[]" ] || [ "$vision_issues_json" = "null" ]; then
    filer_log "No open vision issues found"
    return 0
  fi
  local all_issues
  all_issues=$(filer_api_all "/issues?state=all&type=issues")
  local vision_nums
  vision_nums=$(printf '%s' "$vision_issues_json" | jq -r '.[].number' 2>/dev/null) || return 0
  local closed_count=0
  while IFS= read -r vid; do
    [ -z "$vid" ] && continue
    # Find sub-issues with decomposed-from marker for this vision
    local sub_issues
    sub_issues=$(printf '%s' "$all_issues" | jq --arg vid "$vid" \
      '[.[] | select(.body // "" | contains("<!-- decomposed-from: #" + $vid))]')
    local sub_count
    sub_count=$(printf '%s' "$sub_issues" | jq 'length')
    # No sub-issues means not ready to close
    [ "$sub_count" -eq 0 ] && continue
    # Check if all are closed
    local open_count
    open_count=$(printf '%s' "$sub_issues" | jq '[.[] | select(.state != "closed")] | length')
    if [ "$open_count" -gt 0 ]; then
      continue
    fi
    # All sub-issues closed — close the vision issue
    filer_log "All ${sub_count} sub-issues for vision #${vid} are closed — closing vision"
    local comment_body
    comment_body="## Vision Issue Completed
 All sub-issues have been implemented and merged. This vision issue is now closed.
 ---
 *Automated closure by filer-bot · $(date -u '+%Y-%m-%d %H:%M UTC')*"
    local comment_payload
    comment_payload=$(jq -n --arg body "$comment_body" '{body: $body}')
    curl -sf -X POST \
      -H "Authorization: token ${FORGE_FILER_TOKEN}" \
      -H "Content-Type: application/json" \
      "${FORGE_API}/issues/${vid}/comments" \
      -d "$comment_payload" >/dev/null 2>&1 || true
    curl -sf -X PATCH \
      -H "Authorization: token ${FORGE_FILER_TOKEN}" \
      -H "Content-Type: application/json" \
      "${FORGE_API}/issues/${vid}" \
      -d '{"state":"closed"}' >/dev/null 2>&1 || true
    closed_count=$((closed_count + 1))
  done <<< "$vision_nums"
  if [ "$closed_count" -gt 0 ]; then
    filer_log "Closed ${closed_count} vision issue(s)"
  fi
 }
 # ── Main ─────────────────────────────────────────────────────────────────
 main() {
  if [ "${1:-}" = "--all" ]; then
    local sprints_dir="${2:?Usage: sprint-filer.sh --all <sprints-dir>}"
    local exit_code=0
    for sprint_file in "${sprints_dir}"/*.md; do
      [ -f "$sprint_file" ] || continue
      # Only process files with filer:begin markers
      if ! grep -q '<!-- filer:begin -->' "$sprint_file"; then
        continue
      fi
      if ! file_subissues "$sprint_file"; then
        filer_log "ERROR: failed to process ${sprint_file}"
        exit_code=1
      fi
    done
    # Run vision lifecycle check after filing
    check_and_close_completed_visions || true
    return "$exit_code"
  elif [ -n "${1:-}" ]; then
    file_subissues "$1"
    # Run vision lifecycle check after filing
    check_and_close_completed_visions || true
  else
    echo "Usage: sprint-filer.sh <sprint-file.md>" >&2
    echo "       sprint-filer.sh --all <sprints-dir>" >&2
    return 1
  fi
 }
 # Run main only when executed directly (not when sourced for testing)
 if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
  main "$@"
 fi
--- a/lib/action-vault.sh
+++ b/lib/action-vault.sh
@ -1,9 +1,9 @@
 #!/usr/bin/env bash
-# action-vault.sh — Helper for agents to create vault PRs on ops repo
+# vault.sh — Helper for agents to create vault PRs on ops repo
 #
 # Source after lib/env.sh:
 #   source "$(dirname "$0")/../lib/env.sh"
-#   source "$(dirname "$0")/lib/action-vault.sh"
+#   source "$(dirname "$0")/lib/vault.sh"
 #
 # Required globals: FORGE_TOKEN, FORGE_URL, FORGE_REPO, FORGE_OPS_REPO
 # Optional: OPS_REPO_ROOT (local path for ops repo)
@ -12,7 +12,7 @@
 #   vault_request <action_id> <toml_content>  — Create vault PR, return PR number
 #
 # The function:
-# 1. Validates TOML content using validate_vault_action() from action-vault/vault-env.sh
+# 1. Validates TOML content using validate_vault_action() from vault/vault-env.sh
 # 2. Creates a branch on the ops repo: vault/<action-id>
 # 3. Writes TOML to vault/actions/<action-id>.toml on that branch
 # 4. Creates PR targeting main with title "vault: <action-id>"
@ -133,7 +133,7 @@ vault_request() {
  printf '%s' "$toml_content" > "$tmp_toml"
  # Source vault-env.sh for validate_vault_action
-  local vault_env="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/action-vault/vault-env.sh"
+  local vault_env="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/vault-env.sh"
  if [ ! -f "$vault_env" ]; then
    echo "ERROR: vault-env.sh not found at $vault_env" >&2
    return 1
@ -161,7 +161,7 @@ vault_request() {
  ops_api="$(_vault_ops_api)"
  # Classify the action to determine if PR bypass is allowed
-  local classify_script="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/action-vault/classify.sh"
+  local classify_script="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/classify.sh"
  local vault_tier
  vault_tier=$("$classify_script" "${VAULT_ACTION_FORMULA:-}" "${VAULT_BLAST_RADIUS_OVERRIDE:-}") || {
    # Classification failed, default to high tier (require PR)
--- a/planner/AGENTS.md
+++ b/planner/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: c363ee0aea2ae447daab28c2c850d6abefc8c6b5 -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Planner Agent
 **Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints),
@ -34,9 +34,7 @@ will then sections) and marks the prerequisite as blocked-on-vault in the tree.
 Deduplication: checks pending/ + approved/ + fired/ before creating.
 Phase 4 (journal-and-memory): write updated prerequisite tree + daily journal
 entry (committed to ops repo) and update `$OPS_REPO_ROOT/knowledge/planner-memory.md`.
-Phase 5 (commit-ops): commit all ops repo changes to a `planner/run-YYYY-MM-DD`
+Phase 5 (commit-ops): commit all ops repo changes, push directly.
 branch, then create a PR and walk it to merge via review-bot (`pr_create` →
 `pr_walk_to_merge`), mirroring the architect's ops flow. No direct push to main.
 AGENTS.md maintenance is handled by the Gardener.
 **Artifacts use `$OPS_REPO_ROOT`**: All planner artifacts (journal,
@ -57,7 +55,7 @@ nervous system component, not work.
  creates tmux session, injects formula prompt, monitors phase file, handles crash recovery, cleans up
 - `formulas/run-planner.toml` — Execution spec: six steps (preflight,
  prediction-triage, update-prerequisite-tree, file-at-constraints,
-  journal-and-memory, commit-ops-changes) with `needs` dependencies. Claude
+  journal-and-memory, commit-and-pr) with `needs` dependencies. Claude
  executes all steps in a single interactive session with tool access
 - `formulas/groom-backlog.toml` — Grooming formula for backlog triage and
  grooming. (Note: the planner no longer dispatches breakdown mode — complex
--- a/planner/planner-run.sh
+++ b/planner/planner-run.sh
@ -10,9 +10,7 @@
 #   2. Load formula (formulas/run-planner.toml)
 #   3. Context: VISION.md, AGENTS.md, ops:RESOURCES.md, structural graph,
 #      planner memory, journal entries
-#   4. Create ops branch planner/run-YYYY-MM-DD for changes
+#   4. agent_run(worktree, prompt) → Claude plans, may push knowledge updates
 #   5. agent_run(worktree, prompt) → Claude plans, commits to ops branch
 #   6. If ops branch has commits: pr_create → pr_walk_to_merge (review-bot)
 #
 # Usage:
 #   planner-run.sh [projects/disinto.toml]   # project config (default: disinto)
@ -24,11 +22,10 @@ FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
 # Accept project config from argument; default to disinto (planner is disinto infrastructure)
 export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}"
 # Set override BEFORE sourcing env.sh so it survives any later re-source of
 # env.sh from nested shells / claude -p tools (#762, #747)
 export FORGE_TOKEN_OVERRIDE="${FORGE_PLANNER_TOKEN:-}"
 # shellcheck source=../lib/env.sh
 source "$FACTORY_ROOT/lib/env.sh"
 # Use planner-bot's own Forgejo identity (#747)
 FORGE_TOKEN="${FORGE_PLANNER_TOKEN:-${FORGE_TOKEN}}"
 # shellcheck source=../lib/formula-session.sh
 source "$FACTORY_ROOT/lib/formula-session.sh"
 # shellcheck source=../lib/worktree.sh
@ -37,10 +34,6 @@ source "$FACTORY_ROOT/lib/worktree.sh"
 source "$FACTORY_ROOT/lib/guard.sh"
 # shellcheck source=../lib/agent-sdk.sh
 source "$FACTORY_ROOT/lib/agent-sdk.sh"
 # shellcheck source=../lib/ci-helpers.sh
 source "$FACTORY_ROOT/lib/ci-helpers.sh"
 # shellcheck source=../lib/pr-lifecycle.sh
 source "$FACTORY_ROOT/lib/pr-lifecycle.sh"
 LOG_FILE="${DISINTO_LOG_DIR}/planner/planner.log"
 # shellcheck disable=SC2034  # consumed by agent-sdk.sh
@ -96,9 +89,6 @@ fi
 log "sha=${CURRENT_SHA:0:8} ops=${CURRENT_OPS_SHA:0:8} unreviewed=${unreviewed_count} vision=${vision_open}"
 # ── Resolve forge remote for git operations ─────────────────────────────
 # Run git operations from the project checkout, not the baked code dir
 cd "$PROJECT_REPO_ROOT"
 resolve_forge_remote
 # ── Resolve agent identity for .profile repo ────────────────────────────
@ -152,69 +142,12 @@ ${PROMPT_FOOTER}"
 # ── Create worktree ──────────────────────────────────────────────────────
 formula_worktree_setup "$WORKTREE"
 # ── Prepare ops branch for PR-based merge (#765) ────────────────────────
 PLANNER_OPS_BRANCH="planner/run-$(date -u +%Y-%m-%d)"
 (
  cd "$OPS_REPO_ROOT"
  git fetch origin "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
  git checkout "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
  git pull --ff-only origin "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
  # Create (or reset to) a fresh branch from PRIMARY_BRANCH
  git checkout -B "$PLANNER_OPS_BRANCH" "origin/${PRIMARY_BRANCH}" --quiet 2>/dev/null || \
    git checkout -b "$PLANNER_OPS_BRANCH" --quiet 2>/dev/null || true
 )
 log "ops branch: ${PLANNER_OPS_BRANCH}"
 # ── Run agent ─────────────────────────────────────────────────────────────
 export CLAUDE_MODEL="opus"
 agent_run --worktree "$WORKTREE" "$PROMPT"
 log "agent_run complete"
 # ── PR lifecycle: create PR on ops repo and walk to merge (#765) ─────────
 OPS_FORGE_API="${FORGE_API_BASE}/repos/${FORGE_OPS_REPO}"
 ops_has_commits=false
 if ! git -C "$OPS_REPO_ROOT" diff --quiet "origin/${PRIMARY_BRANCH}..${PLANNER_OPS_BRANCH}" 2>/dev/null; then
  ops_has_commits=true
 fi
 if [ "$ops_has_commits" = "true" ]; then
  log "ops branch has commits — creating PR"
  # Push the branch to the ops remote
  git -C "$OPS_REPO_ROOT" push origin "$PLANNER_OPS_BRANCH" --quiet 2>/dev/null || \
    git -C "$OPS_REPO_ROOT" push --force-with-lease origin "$PLANNER_OPS_BRANCH" 2>/dev/null
  # Temporarily point FORGE_API at the ops repo for pr-lifecycle functions
  ORIG_FORGE_API="$FORGE_API"
  export FORGE_API="$OPS_FORGE_API"
  # Ops repo typically has no Woodpecker CI — skip CI polling
  ORIG_WOODPECKER_REPO_ID="${WOODPECKER_REPO_ID:-2}"
  export WOODPECKER_REPO_ID="0"
  PR_NUM=$(pr_create "$PLANNER_OPS_BRANCH" \
    "chore: planner run $(date -u +%Y-%m-%d)" \
    "Automated planner run — updates prerequisite tree, memory, and vault items." \
    "${PRIMARY_BRANCH}" \
    "$OPS_FORGE_API") || true
  if [ -n "$PR_NUM" ]; then
    log "ops PR #${PR_NUM} created — walking to merge"
    SESSION_ID=$(cat "$SID_FILE" 2>/dev/null || echo "planner-$$")
    pr_walk_to_merge "$PR_NUM" "$SESSION_ID" "$OPS_REPO_ROOT" 1 2 || {
      log "ops PR #${PR_NUM} walk finished: ${_PR_WALK_EXIT_REASON:-unknown}"
    }
    log "ops PR #${PR_NUM} result: ${_PR_WALK_EXIT_REASON:-unknown}"
  else
    log "WARNING: failed to create ops PR for branch ${PLANNER_OPS_BRANCH}"
  fi
  # Restore original FORGE_API
  export FORGE_API="$ORIG_FORGE_API"
  export WOODPECKER_REPO_ID="$ORIG_WOODPECKER_REPO_ID"
 else
  log "no ops changes — skipping PR creation"
 fi
 # Persist watermarks so next run can skip if nothing changed
 mkdir -p "$FACTORY_ROOT/state"
 echo "$CURRENT_SHA" > "$LAST_SHA_FILE"
--- a/predictor/AGENTS.md
+++ b/predictor/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: c363ee0aea2ae447daab28c2c850d6abefc8c6b5 -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Predictor Agent
 **Role**: Abstract adversary (the "goblin"). Runs a 2-step formula
--- a/predictor/predictor-run.sh
+++ b/predictor/predictor-run.sh
@ -23,11 +23,10 @@ FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
 # Accept project config from argument; default to disinto
 export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}"
 # Set override BEFORE sourcing env.sh so it survives any later re-source of
 # env.sh from nested shells / claude -p tools (#762, #747)
 export FORGE_TOKEN_OVERRIDE="${FORGE_PREDICTOR_TOKEN:-}"
 # shellcheck source=../lib/env.sh
 source "$FACTORY_ROOT/lib/env.sh"
 # Use predictor-bot's own Forgejo identity (#747)
 FORGE_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}"
 # shellcheck source=../lib/formula-session.sh
 source "$FACTORY_ROOT/lib/formula-session.sh"
 # shellcheck source=../lib/worktree.sh
@ -64,9 +63,6 @@ memory_guard 2000
 log "--- Predictor run start ---"
 # ── Resolve forge remote for git operations ─────────────────────────────
 # Run git operations from the project checkout, not the baked code dir
 cd "$PROJECT_REPO_ROOT"
 resolve_forge_remote
 # ── Resolve agent identity for .profile repo ────────────────────────────
--- a/projects/disinto.toml.example
+++ b/projects/disinto.toml.example
@ -23,33 +23,18 @@ check_prs            = true
 check_dev_agent      = true
 check_pipeline_stall = false
-# Agent scheduling configuration
+# Agent scheduling — configure gardener and architect polling intervals
 # in the docker-compose.yml environment section (or .env file).
 # Values are in seconds, defaults are 21600 (6 hours) for both.
 #
-# These values are passed to the agents container as environment variables.
+# For active development on the disinto factory itself, consider:
-# The default values (6 hours each) work well for stable production projects.
+#   GARDENER_INTERVAL=3600    # 1 hour
-# For active development on the disinto factory itself, you may want to
+#   ARCHITECT_INTERVAL=600    # 10 minutes
 # configure shorter intervals:
 #
 #   GARDENER_INTERVAL=3600    # 1 hour (default: 21600 = 6 hours)
 #   ARCHITECT_INTERVAL=540    # 9 minutes (default: 21600 = 6 hours)
 #   PLANNER_INTERVAL=660      # 11 minutes (default: 43200 = 12 hours)
 #
 # These can be set in docker-compose.yml environment section or in a .env file.
 #
 # [agents.schedule]
 #   gardener_interval = 21600  # seconds (default: 21600 = 6 hours)
 #   architect_interval  = 21600  # seconds (default: 21600 = 6 hours)
 #   planner_interval    = 43200  # seconds (default: 43200 = 12 hours)
 # Local-model agents (optional) — configure to use llama-server or similar
 # for local LLM inference. Each agent gets its own container with isolated
 # credentials and configuration.
 #
 # When enabled, `disinto init` automatically:
 #   1. Creates a Forgejo bot user matching agents.llama.forge_user
 #   2. Generates FORGE_TOKEN_<BOT> and FORGE_PASS_<BOT> (stored in .env.enc)
 #   3. Adds the bot user as a write collaborator on the project repo
 #
 # [agents.llama]
 #   base_url = "http://10.10.10.1:8081"
 #   model = "unsloth/Qwen3.5-35B-A3B"
--- a/review/AGENTS.md
+++ b/review/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: c363ee0aea2ae447daab28c2c850d6abefc8c6b5 -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Review Agent
 **Role**: AI-powered PR review — post structured findings and formal
--- a/review/review-pr.sh
+++ b/review/review-pr.sh
@ -227,7 +227,6 @@ PROMPT=$(cat "${REVIEW_TMPDIR}/prompt.md")
 status "running review"
 rm -f "$OUTPUT_FILE"
 export CLAUDE_MODEL="sonnet"
 export CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-900}"   # 15 min — reviews shouldn't take longer
 if [ "$IS_RE_REVIEW" = true ] && [ -n "$_AGENT_SESSION_ID" ]; then
  agent_run --resume "$_AGENT_SESSION_ID" --worktree "$WORKTREE" "$PROMPT"
--- a/site/collect-engagement.sh
+++ b/site/collect-engagement.sh
@ -59,21 +59,6 @@ fi
 mkdir -p "$EVIDENCE_DIR"
 # Verify input is Caddy JSON format (not Combined Log Format or other)
 first_line=$(grep -m1 '.' "$CADDY_LOG" || true)
 if [ -z "$first_line" ]; then
  log "WARN: Caddy access log is empty at ${CADDY_LOG}"
  echo "WARN: Caddy access log is empty — nothing to parse." >&2
  exit 0
 fi
 if ! printf '%s\n' "$first_line" | jq empty 2>/dev/null; then
  preview="${first_line:0:200}"
  log "ERROR: Input file is not Caddy JSON format (expected structured JSON access log). Got: ${preview}"
  echo "ERROR: Input file is not Caddy JSON format (expected structured JSON access log)." >&2
  echo "Got: ${preview}" >&2
  exit 1
 fi
 # ── Parse access log ────────────────────────────────────────────────────────
 log "Parsing ${CADDY_LOG} for entries since $(date -u -d "@${CUTOFF_TS}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "${CUTOFF_TS}")"
@ -137,8 +122,7 @@ PAGES=$(printf '%s\n' "$PARSED" | jq -c '
 ')
 TOTAL_REQUESTS=$(printf '%s\n' "$PARSED" | wc -l | tr -d ' ')
-PAGE_VIEWS=$(printf '%s\n' "$PAGES" | grep -c . || true)
+PAGE_VIEWS=$(printf '%s\n' "$PAGES" | grep -c . || echo 0)
 PAGE_VIEWS="${PAGE_VIEWS:-0}"
 UNIQUE_VISITORS=$(printf '%s\n' "$PAGES" | jq -r '.ip' | sort -u | wc -l | tr -d ' ')
 # Top pages by hit count
--- a/supervisor/AGENTS.md
+++ b/supervisor/AGENTS.md
@ -1,4 +1,4 @@
-<!-- last-reviewed: c363ee0aea2ae447daab28c2c850d6abefc8c6b5 -->
+<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
 # Supervisor Agent
 **Role**: Health monitoring and auto-remediation, executed as a formula-driven
@ -7,11 +7,13 @@ then runs an interactive Claude session (sonnet) that assesses health, auto-fixe
 issues, and writes a daily journal. When blocked on external
 resources or human decisions, files vault items instead of escalating directly.
-**Trigger**: `supervisor-run.sh` is invoked by two polling loops:
+**Trigger**: `supervisor-run.sh` is invoked by the polling loop in `docker/edge/entrypoint-edge.sh`
- **Agents container** (`docker/agents/entrypoint.sh`): every `SUPERVISOR_INTERVAL` seconds (default 1200 = 20 min). Controlled by the `supervisor` role in `AGENT_ROLES` (included in the default seven-role set since P1/#801). Logs to `supervisor.log` in the agents container.
+every 20 minutes (line 50-53). Sources `lib/guard.sh` and calls `check_active supervisor` first
- **Edge container** (`docker/edge/entrypoint-edge.sh`): separate loop in the edge container (line 169-172). Runs independently of the agents container's polling schedule.
+— skips if `$FACTORY_ROOT/state/.supervisor-active` is absent. Then runs `claude -p` via
-
+`agent-sdk.sh`, injects `formulas/run-supervisor.toml` with pre-collected metrics as context,
-Both invoke the same `supervisor-run.sh`. Sources `lib/guard.sh` and calls `check_active supervisor` first — skips if `$FACTORY_ROOT/state/.supervisor-active` is absent. Then runs `claude -p` via `agent-sdk.sh`, injects `formulas/run-supervisor.toml` with pre-collected metrics as context, and cleans up on completion or timeout.
+and cleans up on completion or timeout (20 min max session). Note: the supervisor runs in the
 **edge container** (`entrypoint-edge.sh`), not the agent container — this distinction matters
 for operators debugging the factory.
 **Key files**:
 - `supervisor/supervisor-run.sh` — Polling loop participant + orchestrator: lock, memory guard,
@ -37,7 +39,6 @@ P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping).
 **Environment variables consumed**:
 - `FORGE_TOKEN`, `FORGE_SUPERVISOR_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`, `OPS_REPO_ROOT`
 - `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by supervisor-run.sh)
 - `SUPERVISOR_INTERVAL` — polling interval in seconds for agents container (default 1200 = 20 min)
 - `WOODPECKER_TOKEN`, `WOODPECKER_SERVER`, `WOODPECKER_DB_PASSWORD`, `WOODPECKER_DB_USER`, `WOODPECKER_DB_HOST`, `WOODPECKER_DB_NAME` — CI database queries
 **Degraded mode (Issue #544)**: When `OPS_REPO_ROOT` is not set or the directory doesn't exist, the supervisor runs in degraded mode:
--- a/supervisor/supervisor-run.sh
+++ b/supervisor/supervisor-run.sh
@ -25,11 +25,10 @@ FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
 # Accept project config from argument; default to disinto
 export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}"
 # Set override BEFORE sourcing env.sh so it survives any later re-source of
 # env.sh from nested shells / claude -p tools (#762, #747)
 export FORGE_TOKEN_OVERRIDE="${FORGE_SUPERVISOR_TOKEN:-}"
 # shellcheck source=../lib/env.sh
 source "$FACTORY_ROOT/lib/env.sh"
 # Use supervisor-bot's own Forgejo identity (#747)
 FORGE_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}"
 # shellcheck source=../lib/formula-session.sh
 source "$FACTORY_ROOT/lib/formula-session.sh"
 # shellcheck source=../lib/worktree.sh
@ -86,8 +85,7 @@ memory_guard 2000
 log "--- Supervisor run start ---"
 # ── Resolve forge remote for git operations ─────────────────────────────
-# Run git operations from the project checkout, not the baked code dir
+resolve_forge_remote
 cd "$PROJECT_REPO_ROOT"
 # ── Housekeeping: clean up stale crashed worktrees (>24h) ────────────────
 cleanup_stale_crashed_worktrees 24
--- a/tests/lib-hvault.bats
+++ b/tests/lib-hvault.bats
@ -1,215 +0,0 @@
 #!/usr/bin/env bats
 # tests/lib-hvault.bats — Unit tests for lib/hvault.sh
 #
 # Runs against a dev-mode Vault server (single binary, no LXC needed).
 # CI launches vault server -dev inline before running these tests.
 VAULT_BIN="${VAULT_BIN:-vault}"
 setup_file() {
  export TEST_DIR
  TEST_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
  # Start dev-mode vault on a random port
  export VAULT_DEV_PORT
  VAULT_DEV_PORT="$(shuf -i 18200-18299 -n 1)"
  export VAULT_ADDR="http://127.0.0.1:${VAULT_DEV_PORT}"
  "$VAULT_BIN" server -dev \
    -dev-listen-address="127.0.0.1:${VAULT_DEV_PORT}" \
    -dev-root-token-id="test-root-token" \
    -dev-no-store-token \
    &>"${BATS_FILE_TMPDIR}/vault.log" &
  export VAULT_PID=$!
  export VAULT_TOKEN="test-root-token"
  # Wait for vault to be ready (up to 10s)
  local i=0
  while ! curl -sf "${VAULT_ADDR}/v1/sys/health" >/dev/null 2>&1; do
    sleep 0.5
    i=$((i + 1))
    if [ "$i" -ge 20 ]; then
      echo "Vault failed to start. Log:" >&2
      cat "${BATS_FILE_TMPDIR}/vault.log" >&2
      return 1
    fi
  done
 }
 teardown_file() {
  if [ -n "${VAULT_PID:-}" ]; then
    kill "$VAULT_PID" 2>/dev/null || true
    wait "$VAULT_PID" 2>/dev/null || true
  fi
 }
 setup() {
  # Source the module under test
  source "${TEST_DIR}/lib/hvault.sh"
  export VAULT_ADDR VAULT_TOKEN
 }
 # ── hvault_kv_put + hvault_kv_get ────────────────────────────────────────────
@test "hvault_kv_put writes and hvault_kv_get reads a secret" {
  run hvault_kv_put "test/myapp" "username=admin" "password=s3cret"
  [ "$status" -eq 0 ]
  run hvault_kv_get "test/myapp"
  [ "$status" -eq 0 ]
  echo "$output" | jq -e '.username == "admin"'
  echo "$output" | jq -e '.password == "s3cret"'
 }
@test "hvault_kv_get extracts a single key" {
  hvault_kv_put "test/single" "foo=bar" "baz=qux"
  run hvault_kv_get "test/single" "foo"
  [ "$status" -eq 0 ]
  [ "$output" = "bar" ]
 }
@test "hvault_kv_get fails for missing key" {
  hvault_kv_put "test/keymiss" "exists=yes"
  run hvault_kv_get "test/keymiss" "nope"
  [ "$status" -ne 0 ]
 }
@test "hvault_kv_get fails for missing path" {
  run hvault_kv_get "test/does-not-exist-$(date +%s)"
  [ "$status" -ne 0 ]
 }
@test "hvault_kv_put fails without KEY=VAL" {
  run hvault_kv_put "test/bad"
  [ "$status" -ne 0 ]
  echo "$output" | grep -q '"error":true' || echo "$stderr" | grep -q '"error":true'
 }
@test "hvault_kv_put rejects malformed pair (no =)" {
  run hvault_kv_put "test/bad2" "noequals"
  [ "$status" -ne 0 ]
 }
@test "hvault_kv_get fails without PATH" {
  run hvault_kv_get
  [ "$status" -ne 0 ]
 }
 # ── hvault_kv_list ───────────────────────────────────────────────────────────
@test "hvault_kv_list lists keys at a path" {
  hvault_kv_put "test/listdir/a" "k=1"
  hvault_kv_put "test/listdir/b" "k=2"
  run hvault_kv_list "test/listdir"
  [ "$status" -eq 0 ]
  echo "$output" | jq -e '. | length >= 2'
  echo "$output" | jq -e 'index("a")'
  echo "$output" | jq -e 'index("b")'
 }
@test "hvault_kv_list fails on nonexistent path" {
  run hvault_kv_list "test/no-such-path-$(date +%s)"
  [ "$status" -ne 0 ]
 }
@test "hvault_kv_list fails without PATH" {
  run hvault_kv_list
  [ "$status" -ne 0 ]
 }
 # ── hvault_policy_apply ──────────────────────────────────────────────────────
@test "hvault_policy_apply creates a policy" {
  local pfile="${BATS_TEST_TMPDIR}/test-policy.hcl"
  cat > "$pfile" <<'HCL'
 path "secret/data/test/*" {
  capabilities = ["read"]
 }
 HCL
  run hvault_policy_apply "test-reader" "$pfile"
  [ "$status" -eq 0 ]
  # Verify the policy exists via Vault API
  run curl -sf -H "X-Vault-Token: ${VAULT_TOKEN}" \
    "${VAULT_ADDR}/v1/sys/policies/acl/test-reader"
  [ "$status" -eq 0 ]
  echo "$output" | jq -e '.data.policy' | grep -q "secret/data/test"
 }
@test "hvault_policy_apply is idempotent" {
  local pfile="${BATS_TEST_TMPDIR}/idem-policy.hcl"
  printf 'path "secret/*" { capabilities = ["list"] }\n' > "$pfile"
  run hvault_policy_apply "idem-policy" "$pfile"
  [ "$status" -eq 0 ]
  # Apply again — should succeed
  run hvault_policy_apply "idem-policy" "$pfile"
  [ "$status" -eq 0 ]
 }
@test "hvault_policy_apply fails with missing file" {
  run hvault_policy_apply "bad-policy" "/nonexistent/policy.hcl"
  [ "$status" -ne 0 ]
 }
@test "hvault_policy_apply fails without args" {
  run hvault_policy_apply
  [ "$status" -ne 0 ]
 }
 # ── hvault_token_lookup ──────────────────────────────────────────────────────
@test "hvault_token_lookup returns token info" {
  run hvault_token_lookup
  [ "$status" -eq 0 ]
  echo "$output" | jq -e '.policies'
  echo "$output" | jq -e '.accessor'
  echo "$output" | jq -e 'has("ttl")'
 }
@test "hvault_token_lookup fails without VAULT_TOKEN" {
  unset VAULT_TOKEN
  run hvault_token_lookup
  [ "$status" -ne 0 ]
 }
@test "hvault_token_lookup fails without VAULT_ADDR" {
  unset VAULT_ADDR
  run hvault_token_lookup
  [ "$status" -ne 0 ]
 }
 # ── hvault_jwt_login ─────────────────────────────────────────────────────────
@test "hvault_jwt_login fails without VAULT_ADDR" {
  unset VAULT_ADDR
  run hvault_jwt_login "myrole" "fakejwt"
  [ "$status" -ne 0 ]
 }
@test "hvault_jwt_login fails without args" {
  run hvault_jwt_login
  [ "$status" -ne 0 ]
 }
@test "hvault_jwt_login returns error for unconfigured jwt auth" {
  # JWT auth backend is not enabled in dev mode by default — expect failure
  run hvault_jwt_login "myrole" "eyJhbGciOiJSUzI1NiJ9.fake.sig"
  [ "$status" -ne 0 ]
 }
 # ── Env / prereq errors ─────────────────────────────────────────────────────
@test "all functions fail with structured JSON error when VAULT_ADDR unset" {
  unset VAULT_ADDR
  for fn in hvault_kv_get hvault_kv_put hvault_kv_list hvault_policy_apply hvault_token_lookup; do
    run $fn "dummy" "dummy"
    [ "$status" -ne 0 ]
  done
 }
--- a/tests/mock-forgejo.py
+++ b/tests/mock-forgejo.py
@ -505,9 +505,8 @@ class ForgejoHandler(BaseHTTPRequestHandler):
        require_token(self)
        parts = self.path.split("/")
-        # /api/v1/admin/users/{username}/repos → parts[5] is the username
+        if len(parts) >= 6:
-        if len(parts) >= 7:
+            target_user = parts[4]
            target_user = parts[5]
        else:
            json_response(self, 400, {"message": "username required"})
            return
--- a/tests/smoke-credentials.sh
+++ b/tests/smoke-credentials.sh
@ -1,131 +0,0 @@
 #!/usr/bin/env bash
 # tests/smoke-credentials.sh — Verify no git remote URL contains embedded credentials
 #
 # Scans all shell scripts that construct git URLs and verifies:
 #   1. No source file embeds credentials in remote URLs (static check)
 #   2. The repair_baked_cred_urls function correctly strips credentials
 #   3. configure_git_creds writes a working credential helper
 #
 # Required tools: bash, git, grep
 set -euo pipefail
 FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
 FAILED=0
 fail() { printf 'FAIL: %s\n' "$*" >&2; FAILED=1; }
 pass() { printf 'PASS: %s\n' "$*"; }
 # ── 1. Static check: no credential embedding in URL construction ──────────
 echo "=== 1/3 Static check: no credential embedding in URL construction ==="
 # Patterns that embed credentials into git URLs:
 #   sed "s|://|://user:pass@|"   — the classic injection pattern
 #   ://.*:.*@                     — literal user:pass@ in a URL string
 # Allowlist: git-creds.sh itself (it writes the credential helper, not URLs),
 # and this test file.
 cred_embed_pattern='s\|://\|://.*:.*\$\{.*\}@'
 offending_files=()
 while IFS= read -r f; do
  # Skip allowlisted files:
  #   git-creds.sh          — writes the credential helper, not URLs
  #   smoke-credentials.sh  — this test file
  #   hire-agent.sh         — one-shot setup: clones as newly-created user, clone dir deleted immediately
  case "$f" in
    */git-creds.sh|*/smoke-credentials.sh|*/hire-agent.sh) continue ;;
  esac
  if grep -qE "$cred_embed_pattern" "$f" 2>/dev/null; then
    offending_files+=("$f")
  fi
 done < <(git -C "$FACTORY_ROOT" ls-files '*.sh')
 if [ ${#offending_files[@]} -eq 0 ]; then
  pass "No shell scripts embed credentials in git remote URLs"
 else
  for f in "${offending_files[@]}"; do
    fail "Credential embedding found in: $f"
    grep -nE "$cred_embed_pattern" "$FACTORY_ROOT/$f" 2>/dev/null | head -3
  done
 fi
 # ── 2. Unit test: repair_baked_cred_urls strips credentials ───────────────
 echo "=== 2/3 Unit test: repair_baked_cred_urls ==="
 # Source the shared lib
 # shellcheck source=lib/git-creds.sh
 source "${FACTORY_ROOT}/lib/git-creds.sh"
 # Create a temporary git repo with a baked-credential URL
 test_dir=$(mktemp -d)
 trap 'rm -rf "$test_dir"' EXIT
 mkdir -p "${test_dir}/repo"
 git -C "${test_dir}/repo" init -q
 git -C "${test_dir}/repo" config user.email "test@test"
 git -C "${test_dir}/repo" config user.name "test"
 git -C "${test_dir}/repo" commit --allow-empty -m "init" -q
 git -C "${test_dir}/repo" remote add origin "http://dev-bot:secret-token@forgejo:3000/org/repo.git"
 # Run repair
 _GIT_CREDS_LOG_FN="echo" repair_baked_cred_urls "${test_dir}"
 # Verify the URL was cleaned
 repaired_url=$(git -C "${test_dir}/repo" config --get remote.origin.url)
 if [ "$repaired_url" = "http://forgejo:3000/org/repo.git" ]; then
  pass "repair_baked_cred_urls correctly stripped credentials"
 else
  fail "repair_baked_cred_urls result: '${repaired_url}' (expected 'http://forgejo:3000/org/repo.git')"
 fi
 # Also test that a clean URL is left untouched
 git -C "${test_dir}/repo" remote set-url origin "http://forgejo:3000/org/repo.git"
 _GIT_CREDS_LOG_FN="echo" repair_baked_cred_urls "${test_dir}"
 clean_url=$(git -C "${test_dir}/repo" config --get remote.origin.url)
 if [ "$clean_url" = "http://forgejo:3000/org/repo.git" ]; then
  pass "repair_baked_cred_urls leaves clean URLs untouched"
 else
  fail "repair_baked_cred_urls modified a clean URL: '${clean_url}'"
 fi
 # ── 3. Unit test: configure_git_creds writes a credential helper ──────────
 echo "=== 3/3 Unit test: configure_git_creds ==="
 cred_home=$(mktemp -d)
 # Export required globals
 export FORGE_PASS="test-password-123"
 export FORGE_URL="http://forgejo:3000"
 export FORGE_TOKEN=""  # skip API call in test
 configure_git_creds "$cred_home"
 if [ -x "${cred_home}/.git-credentials-helper" ]; then
  pass "Credential helper script created and executable"
 else
  fail "Credential helper script not found or not executable at ${cred_home}/.git-credentials-helper"
 fi
 # Verify the helper outputs correct credentials
 helper_output=$(echo "" | "${cred_home}/.git-credentials-helper" get 2>/dev/null)
 if printf '%s' "$helper_output" | grep -q "password=test-password-123"; then
  pass "Credential helper outputs correct password"
 else
  fail "Credential helper output missing password: ${helper_output}"
 fi
 if printf '%s' "$helper_output" | grep -q "host=forgejo:3000"; then
  pass "Credential helper outputs correct host"
 else
  fail "Credential helper output missing host: ${helper_output}"
 fi
 rm -rf "$cred_home"
 # ── Summary ───────────────────────────────────────────────────────────────
 echo ""
 if [ "$FAILED" -ne 0 ]; then
  echo "=== SMOKE-CREDENTIALS TEST FAILED ==="
  exit 1
 fi
 echo "=== SMOKE-CREDENTIALS TEST PASSED ==="
--- a/tests/smoke-init.sh
+++ b/tests/smoke-init.sh
@ -28,9 +28,7 @@ cleanup() {
  # Kill any leftover mock-forgejo.py processes by name
  pkill -f "mock-forgejo.py" 2>/dev/null || true
  rm -rf "$MOCK_BIN" /tmp/smoke-test-repo \
-         "${FACTORY_ROOT}/projects/smoke-repo.toml" \
+         "${FACTORY_ROOT}/projects/smoke-repo.toml"
         /tmp/smoke-claude-shared /tmp/smoke-home-claude \
         /tmp/smoke-env-before-rerun /tmp/smoke-env-before-dryrun
  # Restore .env only if we created the backup
  if [ -f "${FACTORY_ROOT}/.env.smoke-backup" ]; then
    mv "${FACTORY_ROOT}/.env.smoke-backup" "${FACTORY_ROOT}/.env"
@ -163,8 +161,6 @@ git commit --quiet -m "Initial commit"
 export SMOKE_FORGE_URL="$FORGE_URL"
 export FORGE_URL
 # Required for non-interactive init (issue #620)
 export FORGE_ADMIN_PASS="smoke-test-password-123"
 # Skip push to mock server (no git support)
 export SKIP_PUSH=true
@ -179,30 +175,8 @@ else
  fail "disinto init exited non-zero"
 fi
-# ── Dry-run test: must not modify state ────────────────────────────────────
+# ── Idempotency test: run init again ───────────────────────────────────────
 echo "=== Dry-run test ==="
 cp "${FACTORY_ROOT}/.env" /tmp/smoke-env-before-dryrun
 if bash "${FACTORY_ROOT}/bin/disinto" init \
  "${TEST_SLUG}" \
  --bare --yes --dry-run \
  --forge-url "$FORGE_URL" \
  --repo-root "/tmp/smoke-test-repo" 2>&1 | grep -q "Dry run complete"; then
  pass "disinto init --dry-run exited successfully"
 else
  fail "disinto init --dry-run did not complete"
 fi
 # Verify --dry-run did not modify .env
 if diff -q /tmp/smoke-env-before-dryrun "${FACTORY_ROOT}/.env" >/dev/null 2>&1; then
  pass "dry-run: .env unchanged"
 else
  fail "dry-run: .env was modified (should be read-only)"
 fi
 rm -f /tmp/smoke-env-before-dryrun
 # ── Idempotency test: run init again, verify .env is stable ────────────────
 echo "=== Idempotency test: running disinto init again ==="
 cp "${FACTORY_ROOT}/.env" /tmp/smoke-env-before-rerun
 if bash "${FACTORY_ROOT}/bin/disinto" init \
  "${TEST_SLUG}" \
  --bare --yes \
@ -213,29 +187,6 @@ else
  fail "disinto init (re-run) exited non-zero"
 fi
 # Verify .env is stable across re-runs (no token churn)
 if diff -q /tmp/smoke-env-before-rerun "${FACTORY_ROOT}/.env" >/dev/null 2>&1; then
  pass "idempotency: .env unchanged on re-run"
 else
  fail "idempotency: .env changed on re-run (token churn detected)"
  diff /tmp/smoke-env-before-rerun "${FACTORY_ROOT}/.env" >&2 || true
 fi
 rm -f /tmp/smoke-env-before-rerun
 # Verify FORGE_ADMIN_TOKEN is stored in .env
 if grep -q '^FORGE_ADMIN_TOKEN=' "${FACTORY_ROOT}/.env"; then
  pass ".env contains FORGE_ADMIN_TOKEN"
 else
  fail ".env missing FORGE_ADMIN_TOKEN"
 fi
 # Verify HUMAN_TOKEN is stored in .env
 if grep -q '^HUMAN_TOKEN=' "${FACTORY_ROOT}/.env"; then
  pass ".env contains HUMAN_TOKEN"
 else
  fail ".env missing HUMAN_TOKEN"
 fi
 # ── 4. Verify Forgejo state ─────────────────────────────────────────────────
 echo "=== 4/6 Verifying Forgejo state ==="
@ -333,96 +284,6 @@ else
  fi
 fi
 # ── 7. Verify CLAUDE_CONFIG_DIR setup ─────────────────────────────────────
 echo "=== 7/7 Verifying CLAUDE_CONFIG_DIR setup ==="
 # .env should contain CLAUDE_SHARED_DIR and CLAUDE_CONFIG_DIR
 if grep -q '^CLAUDE_SHARED_DIR=' "$env_file"; then
  pass ".env contains CLAUDE_SHARED_DIR"
 else
  fail ".env missing CLAUDE_SHARED_DIR"
 fi
 if grep -q '^CLAUDE_CONFIG_DIR=' "$env_file"; then
  pass ".env contains CLAUDE_CONFIG_DIR"
 else
  fail ".env missing CLAUDE_CONFIG_DIR"
 fi
 # Test migration path with a temporary HOME
 echo "--- Testing claude config migration ---"
 ORIG_HOME="$HOME"
 ORIG_CLAUDE_SHARED_DIR="${CLAUDE_SHARED_DIR:-}"
 ORIG_CLAUDE_CONFIG_DIR="${CLAUDE_CONFIG_DIR:-}"
 export HOME="/tmp/smoke-home-claude"
 export CLAUDE_SHARED_DIR="/tmp/smoke-claude-shared"
 export CLAUDE_CONFIG_DIR="${CLAUDE_SHARED_DIR}/config"
 mkdir -p "$HOME"
 # Source claude-config.sh for setup_claude_config_dir
 source "${FACTORY_ROOT}/lib/claude-config.sh"
 # Sub-test 1: fresh install (no ~/.claude, no config dir)
 rm -rf "$HOME/.claude" "$CLAUDE_SHARED_DIR"
 setup_claude_config_dir "true"
 if [ -d "$CLAUDE_CONFIG_DIR" ]; then
  pass "Fresh install: CLAUDE_CONFIG_DIR created"
 else
  fail "Fresh install: CLAUDE_CONFIG_DIR not created"
 fi
 if [ -L "$HOME/.claude" ]; then
  pass "Fresh install: ~/.claude symlink created"
 else
  fail "Fresh install: ~/.claude symlink not created"
 fi
 # Sub-test 2: migration (pre-existing ~/.claude with content)
 rm -rf "$HOME/.claude" "$CLAUDE_SHARED_DIR"
 mkdir -p "$HOME/.claude"
 echo "test-token" > "$HOME/.claude/.credentials.json"
 setup_claude_config_dir "true"
 if [ -f "$CLAUDE_CONFIG_DIR/.credentials.json" ]; then
  pass "Migration: .credentials.json moved to CLAUDE_CONFIG_DIR"
 else
  fail "Migration: .credentials.json not found in CLAUDE_CONFIG_DIR"
 fi
 if [ -L "$HOME/.claude" ]; then
  link_target=$(readlink -f "$HOME/.claude")
  config_real=$(readlink -f "$CLAUDE_CONFIG_DIR")
  if [ "$link_target" = "$config_real" ]; then
    pass "Migration: ~/.claude is symlink to CLAUDE_CONFIG_DIR"
  else
    fail "Migration: ~/.claude symlink points to wrong target"
  fi
 else
  fail "Migration: ~/.claude is not a symlink"
 fi
 # Sub-test 3: idempotency (re-run after migration)
 setup_claude_config_dir "true"
 if [ -L "$HOME/.claude" ] && [ -f "$CLAUDE_CONFIG_DIR/.credentials.json" ]; then
  pass "Idempotency: re-run is a no-op"
 else
  fail "Idempotency: re-run broke the layout"
 fi
 # Sub-test 4: both non-empty — must abort
 rm -rf "$HOME/.claude" "$CLAUDE_SHARED_DIR"
 mkdir -p "$HOME/.claude" "$CLAUDE_CONFIG_DIR"
 echo "home-data" > "$HOME/.claude/home.txt"
 echo "config-data" > "$CLAUDE_CONFIG_DIR/config.txt"
 if setup_claude_config_dir "true" 2>/dev/null; then
  fail "Both non-empty: should have aborted but didn't"
 else
  pass "Both non-empty: correctly aborted"
 fi
 # Restore
 export HOME="$ORIG_HOME"
 export CLAUDE_SHARED_DIR="$ORIG_CLAUDE_SHARED_DIR"
 export CLAUDE_CONFIG_DIR="$ORIG_CLAUDE_CONFIG_DIR"
 rm -rf /tmp/smoke-claude-shared /tmp/smoke-home-claude
 # ── Summary ──────────────────────────────────────────────────────────────────
 echo ""
 if [ "$FAILED" -ne 0 ]; then
--- a/tests/smoke-load-secret.sh
+++ b/tests/smoke-load-secret.sh
@ -1,162 +0,0 @@
 #!/usr/bin/env bash
 # tests/smoke-load-secret.sh — Unit tests for load_secret() precedence chain
 #
 # Covers the 4 precedence cases:
 #   1. /secrets/<NAME>.env  (Nomad template)
 #   2. Current environment
 #   3. secrets/<NAME>.enc   (age-encrypted per-key file)
 #   4. Default / empty fallback
 #
 # Required tools: bash, age (for case 3)
 set -euo pipefail
 FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
 fail() { printf 'FAIL: %s\n' "$*" >&2; FAILED=1; }
 pass() { printf 'PASS: %s\n' "$*"; }
 FAILED=0
 # Set up a temp workspace and fake HOME so age key paths work
 test_dir=$(mktemp -d)
 fake_home=$(mktemp -d)
 trap 'rm -rf "$test_dir" "$fake_home"' EXIT
 # Minimal env for sourcing env.sh's load_secret function without the full boot
 # We source the function definition directly to isolate the unit under test.
 # shellcheck disable=SC2034
 export USER="${USER:-test}"
 export HOME="$fake_home"
 # Source env.sh to get load_secret (and FACTORY_ROOT)
 source "${FACTORY_ROOT}/lib/env.sh"
 # ── Case 4: Default / empty fallback ────────────────────────────────────────
 echo "=== 1/5 Case 4: default fallback ==="
 unset TEST_SECRET_FALLBACK 2>/dev/null || true
 val=$(load_secret TEST_SECRET_FALLBACK "my-default")
 if [ "$val" = "my-default" ]; then
  pass "load_secret returns default when nothing is set"
 else
  fail "Expected 'my-default', got '${val}'"
 fi
 val=$(load_secret TEST_SECRET_FALLBACK)
 if [ -z "$val" ]; then
  pass "load_secret returns empty when no default and nothing set"
 else
  fail "Expected empty, got '${val}'"
 fi
 # ── Case 2: Environment variable already set ────────────────────────────────
 echo "=== 2/5 Case 2: environment variable ==="
 export TEST_SECRET_ENV="from-environment"
 val=$(load_secret TEST_SECRET_ENV "ignored-default")
 if [ "$val" = "from-environment" ]; then
  pass "load_secret returns env value over default"
 else
  fail "Expected 'from-environment', got '${val}'"
 fi
 unset TEST_SECRET_ENV
 # ── Case 3: Age-encrypted per-key file ──────────────────────────────────────
 echo "=== 3/5 Case 3: age-encrypted secret ==="
 if command -v age &>/dev/null && command -v age-keygen &>/dev/null; then
  # Generate a test age key
  age_key_dir="${fake_home}/.config/sops/age"
  mkdir -p "$age_key_dir"
  age-keygen -o "${age_key_dir}/keys.txt" 2>/dev/null
  pub_key=$(age-keygen -y "${age_key_dir}/keys.txt")
  # Create encrypted secret
  secrets_dir="${FACTORY_ROOT}/secrets"
  mkdir -p "$secrets_dir"
  printf 'age-test-value' | age -r "$pub_key" -o "${secrets_dir}/TEST_SECRET_AGE.enc"
  unset TEST_SECRET_AGE 2>/dev/null || true
  val=$(load_secret TEST_SECRET_AGE "fallback")
  if [ "$val" = "age-test-value" ]; then
    pass "load_secret decrypts age-encrypted secret"
  else
    fail "Expected 'age-test-value', got '${val}'"
  fi
  # Verify caching: call load_secret directly (not in subshell) so export propagates
  unset TEST_SECRET_AGE 2>/dev/null || true
  load_secret TEST_SECRET_AGE >/dev/null
  if [ "${TEST_SECRET_AGE:-}" = "age-test-value" ]; then
    pass "load_secret caches decrypted value in environment (direct call)"
  else
    fail "Decrypted value not cached in environment"
  fi
  # Clean up test secret
  rm -f "${secrets_dir}/TEST_SECRET_AGE.enc"
  rmdir "$secrets_dir" 2>/dev/null || true
  unset TEST_SECRET_AGE
 else
  echo "SKIP: age/age-keygen not found — skipping age decryption test"
 fi
 # ── Case 1: Nomad template path ────────────────────────────────────────────
 echo "=== 4/5 Case 1: Nomad template (/secrets/<NAME>.env) ==="
 nomad_dir="/secrets"
 if [ -w "$(dirname "$nomad_dir")" ] 2>/dev/null || [ -w "$nomad_dir" ] 2>/dev/null; then
  mkdir -p "$nomad_dir"
  printf 'TEST_SECRET_NOMAD=from-nomad-template\n' > "${nomad_dir}/TEST_SECRET_NOMAD.env"
  # Even with env set, Nomad path takes precedence
  export TEST_SECRET_NOMAD="from-env-should-lose"
  val=$(load_secret TEST_SECRET_NOMAD "default")
  if [ "$val" = "from-nomad-template" ]; then
    pass "load_secret prefers Nomad template over env"
  else
    fail "Expected 'from-nomad-template', got '${val}'"
  fi
  rm -f "${nomad_dir}/TEST_SECRET_NOMAD.env"
  rmdir "$nomad_dir" 2>/dev/null || true
  unset TEST_SECRET_NOMAD
 else
  echo "SKIP: /secrets not writable — skipping Nomad template test (needs root or container)"
 fi
 # ── Precedence: env beats age ────────────────────────────────────────────
 echo "=== 5/5 Precedence: env beats age-encrypted ==="
 if command -v age &>/dev/null && command -v age-keygen &>/dev/null; then
  age_key_dir="${fake_home}/.config/sops/age"
  mkdir -p "$age_key_dir"
  [ -f "${age_key_dir}/keys.txt" ] || age-keygen -o "${age_key_dir}/keys.txt" 2>/dev/null
  pub_key=$(age-keygen -y "${age_key_dir}/keys.txt")
  secrets_dir="${FACTORY_ROOT}/secrets"
  mkdir -p "$secrets_dir"
  printf 'age-value-should-lose' | age -r "$pub_key" -o "${secrets_dir}/TEST_SECRET_PREC.enc"
  export TEST_SECRET_PREC="env-value-wins"
  val=$(load_secret TEST_SECRET_PREC "default")
  if [ "$val" = "env-value-wins" ]; then
    pass "load_secret prefers env over age-encrypted file"
  else
    fail "Expected 'env-value-wins', got '${val}'"
  fi
  rm -f "${secrets_dir}/TEST_SECRET_PREC.enc"
  rmdir "$secrets_dir" 2>/dev/null || true
  unset TEST_SECRET_PREC
 else
  echo "SKIP: age not found — skipping precedence test"
 fi
 # ── Summary ───────────────────────────────────────────────────────────────
 echo ""
 if [ "$FAILED" -ne 0 ]; then
  echo "=== SMOKE-LOAD-SECRET TEST FAILED ==="
  exit 1
 fi
 echo "=== SMOKE-LOAD-SECRET TEST PASSED ==="
--- a/tools/edge-control/README.md
+++ b/tools/edge-control/README.md
@ -1,284 +0,0 @@
 # Edge Control Plane
 SSH-forced-command control plane for managing reverse tunnels to edge hosts.
 ## Overview
 This control plane runs on the public edge host (Debian DO box) and provides:
 - **Self-service tunnel registration**: Projects run `disinto edge register` to get an assigned port and FQDN
 - **SSH forced commands**: Uses `restrict,command="..."` authorized_keys entries — no new HTTP daemon
 - **Hot-patched Caddy routing**: `<project>.disinto.ai` → `127.0.0.1:<port>` via Caddy admin API
 - **Port allocator**: Manages ports in `20000-29999` range with flock-based concurrency control
 ## Architecture
 ```
 ┌─────────────────────────────────────────────────────────────────────────────┐
 │                           Edge Host (Debian DO)                              │
 │                                                                              │
 │  ┌──────────────────┐    ┌───────────────────────────────────────────────┐  │
 │  │  disinto-register│    │  /var/lib/disinto/                            │  │
 │  │  (authorized_keys│    │  ├── registry.json (source of truth)          │  │
 │  │   forced cmd)    │    │  ├── registry.lock (flock)                    │  │
 │  │                  │    │  └── authorized_keys (rebuildable)            │  │
 │  └────────┬─────────┘    └───────────────────────────────────────────────┘  │
 │           │                                                                   │
 │           ▼                                                                   │
 │  ┌─────────────────────────────────────────────────────────────────────┐     │
 │  │  register.sh (forced command handler)                                │     │
 │  │  ──────────────────────────────────────────────────────────────────  │     │
 │  │  • Parses SSH_ORIGINAL_COMMAND                                       │     │
 │  │  • Dispatches to register|deregister|list                            │     │
 │  │  • Returns JSON on stdout                                            │     │
 │  └─────────────────────────────────────────────────────────────────────┘     │
 │           │                                                                   │
 │           │ lib/                                                              │
 │           ├─ ports.sh    → port allocator (20000-29999)                      │
 │           ├─ authorized_keys.sh → rebuild authorized_keys from registry     │
 │           └─ caddy.sh    → Caddy admin API (127.0.0.1:2019)                  │
 │                                                                              │
 │  ┌─────────────────────────────────────────────────────────────────────┐     │
 │  │  Caddy (with Gandi DNS plugin)                                       │     │
 │  │  ──────────────────────────────────────────────────────────────────  │     │
 │  │  • Admin API on 127.0.0.1:2019                                       │     │
 │  │  • Wildcard *.disinto.ai cert (DNS-01 via Gandi)                     │     │
 │  │  • Site blocks hot-patched via admin API                             │     │
 │  └─────────────────────────────────────────────────────────────────────┘     │
 │                                                                              │
 │  ┌─────────────────────────────────────────────────────────────────────┐     │
 │  │  disinto-tunnel (no shell, no password)                              │     │
 │  │  ──────────────────────────────────────────────────────────────────  │     │
 │  │  • Receives reverse tunnels only                                     │     │
 │  │  • authorized_keys: permitlisten="127.0.0.1:<port>"                  │     │
 │  └─────────────────────────────────────────────────────────────────────┘     │
 └─────────────────────────────────────────────────────────────────────────────┘
 ```
 ## Installation
 ### Prerequisites
 - Fresh Debian 12 (Bookworm) system
 - Root or sudo access
 - Domain `disinto.ai` hosted at Gandi with API token
 ### One-Click Install
 ```bash
 # Download and run installer
 curl -sL https://raw.githubusercontent.com/disinto-admin/disinto/fix/issue-621/tools/edge-control/install.sh | bash -s -- --gandi-token YOUR_GANDI_API_TOKEN
 # You'll be prompted to paste your admin pubkey for the disinto-register user
 ```
 ### What install.sh Does
 1. **Creates users**:
   - `disinto-register` — owns registry, runs Caddy admin API calls
   - `disinto-tunnel` — no password, no shell, only receives reverse tunnels
 2. **Creates data directory**:
   - `/var/lib/disinto/` with `registry.json`, `registry.lock`
   - Permissions: `root:disinto-register 0750`
 3. **Installs Caddy**:
   - Backs up any pre-existing `/etc/caddy/Caddyfile` to `/etc/caddy/Caddyfile.pre-disinto`
   - Download Caddy with Gandi DNS plugin
   - Enable admin API on `127.0.0.1:2019`
   - Configure wildcard cert for `*.disinto.ai` via DNS-01
   - Creates `/etc/caddy/extra.d/` for operator-owned site blocks
   - Emitted Caddyfile ends with `import /etc/caddy/extra.d/*.caddy`
 4. **Sets up SSH**:
   - Creates `disinto-register` authorized_keys with forced command
   - Creates `disinto-tunnel` authorized_keys (rebuildable from registry)
 5. **Installs control plane scripts**:
   - `/opt/disinto-edge/register.sh` — forced command handler
   - `/opt/disinto-edge/lib/*.sh` — helper libraries
 ## Operator-Owned Site Blocks
 Edge-control owns the top-level `/etc/caddy/Caddyfile` and dynamic `<project>.<DOMAIN_SUFFIX>` routes injected via the Caddy admin API. Operators own everything under `/etc/caddy/extra.d/`.
 To serve non-tunnel content (apex domain, www redirect, static sites), drop `.caddy` files into `/etc/caddy/extra.d/`:
 ```bash
 # Example: /etc/caddy/extra.d/landing.caddy
 disinto.ai {
  root * /home/debian/disinto-site
  file_server
 }
 # Example: /etc/caddy/extra.d/www-redirect.caddy
 www.disinto.ai {
  redir https://disinto.ai{uri} permanent
 }
 ```
 These files survive across `install.sh` re-runs. The `--extra-caddyfile <path>` flag overrides the default import glob (`/etc/caddy/extra.d/*.caddy`) if needed.
 ## Usage
 ### Register a Tunnel (from dev box)
 ```bash
 # First-time setup (generates tunnel keypair)
 disinto edge register myproject
 # Subsequent runs are idempotent
 disinto edge register myproject  # returns same port/FQDN
 ```
 Response:
 ```json
 {"port":23456,"fqdn":"myproject.disinto.ai"}
 ```
 These values are written to `.env` as:
 ```
 EDGE_TUNNEL_HOST=edge.disinto.ai
 EDGE_TUNNEL_PORT=23456
 EDGE_TUNNEL_FQDN=myproject.disinto.ai
 ```
 ### Deregister a Tunnel
 ```bash
 disinto edge deregister myproject
 ```
 This:
 - Removes the authorized_keys entry for the tunnel
 - Removes the Caddy site block
 - Frees the port in the registry
 ### Check Status
 ```bash
 disinto edge status
 ```
 Shows all registered tunnels with their ports and FQDNs.
 ## Registry Schema
 `/var/lib/disinto/registry.json`:
 ```json
 {
  "version": 1,
  "projects": {
    "myproject": {
      "port": 23456,
      "fqdn": "myproject.disinto.ai",
      "pubkey": "ssh-ed25519 AAAAC3Nza... operator@devbox",
      "registered_at": "2026-04-10T14:30:00Z"
    }
  }
 }
 ```
 ## Recovery
 ### After State Loss
 If `registry.json` is lost but Caddy config persists:
 ```bash
 # Rebuild from existing Caddy config
 ssh disinto-register@edge.disinto.ai '
  /opt/disinto-edge/lib/rebuild-registry-from-caddy.sh
 '
 ```
 ### Rebuilding authorized_keys
 If `authorized_keys` is corrupted:
 ```bash
 ssh disinto-register@edge.disinto.ai '
  /opt/disinto-edge/lib/rebuild-authorized-keys.sh
 '
 ```
 ### Rotating Admin Key
 To rotate the `disinto-register` admin pubkey:
 ```bash
 # On edge host, remove old pubkey from authorized_keys
 # Add new pubkey: echo "new-pubkey" >> /home/disinto-register/.ssh/authorized_keys
 # Trigger rebuild: /opt/disinto-edge/lib/rebuild-authorized-keys.sh
 ```
 ### Adding a Second Edge Host
 For high availability, add a second edge host:
 1. Run `install.sh` on the second host
 2. Configure Caddy to use the same registry (NFS or shared storage)
 3. Update `EDGE_HOST` in `.env` to load-balance between hosts
 4. Use a reverse proxy (HAProxy, Traefik) in front of both edge hosts
 ## Security
 ### What's Protected
 - **No new attack surface**: sshd is already the only listener; control plane is a forced command
 - **Restricted tunnel user**: `disinto-tunnel` cannot shell in, only receive reverse tunnels
 - **Port validation**: Tunnel connections outside allocated ports are refused
 - **Forced command**: `disinto-register` can only execute `register.sh`
 ### Certificate Strategy
 - Single wildcard `*.disinto.ai` cert via DNS-01 through Gandi
 - Caddy handles automatic renewal
 - No per-project cert work needed
 ### Future Considerations
 - Long-term "shop" vision could layer an HTTP API on top
 - forward_auth / OAuth is out of scope (handled per-project inside edge container)
 ## Testing
 ### Verify Tunnel User Restrictions
 ```bash
 # Should hang (no command given)
 ssh -i tunnel_key disinto-tunnel@edge.disinto.ai
 # Should fail (port outside allocation)
 ssh -R 127.0.0.1:9999:localhost:80 disinto-tunnel@edge.disinto.ai
 # Should succeed (port within allocation)
 ssh -R 127.0.0.1:23456:localhost:80 disinto-tunnel@edge.disinto.ai
 ```
 ### Verify Admin User Restrictions
 ```bash
 # Should fail (not a valid command)
 ssh disinto-register@edge.disinto.ai "random command"
 # Should succeed (valid command)
 ssh disinto-register@edge.disinto.ai "register myproject $(cat ~/.ssh/id_ed25519.pub)"
 ```
 ## Files
 - `install.sh` — One-shot installer for fresh Debian DO box
 - `register.sh` — Forced-command handler (dispatches to `register|deregister|list`)
 - `lib/ports.sh` — Port allocator over `20000-29999`, jq-based, flockd
 - `lib/authorized_keys.sh` — Deterministic rebuild of `disinto-tunnel` authorized_keys
 - `lib/caddy.sh` — POST to Caddy admin API for route mapping
 ## Dependencies
 - `bash` — All scripts are bash
 - `jq` — JSON parsing for registry
 - `flock` — Concurrency control for registry updates
 - `caddy` — Web server with admin API and Gandi DNS plugin
 - `ssh` — OpenSSH for forced commands and reverse tunnels
--- a/tools/edge-control/install.sh
+++ b/tools/edge-control/install.sh
@ -1,407 +0,0 @@
 #!/usr/bin/env bash
 # =============================================================================
 # install.sh — One-shot installer for edge control plane on Debian DO box
 #
 # Usage:
 #   curl -sL https://raw.githubusercontent.com/disinto-admin/disinto/fix/issue-621/tools/edge-control/install.sh | bash -s -- --gandi-token YOUR_TOKEN
 #
 # What it does:
 #   1. Creates users: disinto-register, disinto-tunnel
 #   2. Creates /var/lib/disinto/ with registry.json, registry.lock
 #   3. Installs Caddy with Gandi DNS plugin
 #   4. Sets up SSH authorized_keys for both users
 #   5. Installs control plane scripts to /opt/disinto-edge/
 #
 # Requirements:
 #   - Fresh Debian 12 (Bookworm)
 #   - Root or sudo access
 #   - Gandi API token (for wildcard cert)
 # =============================================================================
 set -euo pipefail
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color
 log_info() {
  echo -e "${GREEN}[INFO]${NC} $1"
 }
 log_warn() {
  echo -e "${YELLOW}[WARN]${NC} $1"
 }
 log_error() {
  echo -e "${RED}[ERROR]${NC} $1"
 }
 # Parse arguments
 GANDI_TOKEN=""
 INSTALL_DIR="/opt/disinto-edge"
 REGISTRY_DIR="/var/lib/disinto"
 CADDY_VERSION="2.8.4"
 DOMAIN_SUFFIX="disinto.ai"
 EXTRA_CADDYFILE="/etc/caddy/extra.d/*.caddy"
 usage() {
  cat <<EOF
 Usage: $0 [options]
 Options:
  --gandi-token <token>       Gandi API token for wildcard cert (required)
  --install-dir <dir>         Install directory (default: /opt/disinto-edge)
  --registry-dir <dir>        Registry directory (default: /var/lib/disinto)
  --caddy-version <ver>       Caddy version to install (default: ${CADDY_VERSION})
  --domain-suffix <suffix>    Domain suffix for tunnels (default: disinto.ai)
  --extra-caddyfile <path>    Import path for operator-owned Caddy config
                              (default: /etc/caddy/extra.d/*.caddy)
  -h, --help                  Show this help
 Example:
  $0 --gandi-token YOUR_GANDI_API_TOKEN
 EOF
  exit 1
 }
 while [[ $# -gt 0 ]]; do
  case $1 in
    --gandi-token)
      GANDI_TOKEN="$2"
      shift 2
      ;;
    --install-dir)
      INSTALL_DIR="$2"
      shift 2
      ;;
    --registry-dir)
      REGISTRY_DIR="$2"
      shift 2
      ;;
    --caddy-version)
      CADDY_VERSION="$2"
      shift 2
      ;;
    --domain-suffix)
      DOMAIN_SUFFIX="$2"
      shift 2
      ;;
    --extra-caddyfile)
      EXTRA_CADDYFILE="$2"
      shift 2
      ;;
    -h|--help)
      usage
      ;;
    *)
      log_error "Unknown option: $1"
      usage
      ;;
  esac
 done
 # Validate required arguments
 if [ -z "$GANDI_TOKEN" ]; then
  log_error "Gandi API token is required (--gandi-token)"
  usage
 fi
 log_info "Starting edge control plane installation..."
 # =============================================================================
 # Step 1: Create users
 # =============================================================================
 log_info "Creating users..."
 # Create disinto-register user
 if ! id "disinto-register" &>/dev/null; then
  useradd -r -s /usr/sbin/nologin -m -d /home/disinto-register "disinto-register" 2>/dev/null || true
  log_info "Created user: disinto-register"
 else
  log_info "User already exists: disinto-register"
 fi
 # Create disinto-tunnel user
 if ! id "disinto-tunnel" &>/dev/null; then
  useradd -r -s /usr/sbin/nologin -M "disinto-tunnel" 2>/dev/null || true
  log_info "Created user: disinto-tunnel"
 else
  log_info "User already exists: disinto-tunnel"
 fi
 # =============================================================================
 # Step 2: Create registry directory
 # =============================================================================
 log_info "Creating registry directory..."
 mkdir -p "$REGISTRY_DIR"
 chown root:disinto-register "$REGISTRY_DIR"
 chmod 0750 "$REGISTRY_DIR"
 # Initialize registry.json
 REGISTRY_FILE="${REGISTRY_DIR}/registry.json"
 if [ ! -f "$REGISTRY_FILE" ]; then
  echo '{"version":1,"projects":{}}' > "$REGISTRY_FILE"
  chmod 0644 "$REGISTRY_FILE"
  log_info "Initialized registry: ${REGISTRY_FILE}"
 fi
 # Create lock file
 LOCK_FILE="${REGISTRY_DIR}/registry.lock"
 touch "$LOCK_FILE"
 chmod 0644 "$LOCK_FILE"
 # =============================================================================
 # Step 3: Install Caddy with Gandi DNS plugin
 # =============================================================================
 log_info "Installing Caddy ${CADDY_VERSION} with Gandi DNS plugin..."
 # Create Caddy config directory
 CADDY_CONFIG_DIR="/etc/caddy"
 CADDY_DATA_DIR="/var/lib/caddy"
 mkdir -p "$CADDY_CONFIG_DIR" "$CADDY_DATA_DIR"
 chmod 755 "$CADDY_CONFIG_DIR" "$CADDY_DATA_DIR"
 # Download Caddy binary with Gandi plugin
 CADDY_BINARY="/usr/bin/caddy"
 # Build Caddy with Gandi plugin using caddy build command
 if ! command -v caddy &>/dev/null; then
  log_info "Installing Caddy builder..."
  go install github.com/caddyserver/caddy/v2/cmd/caddy@latest 2>/dev/null || {
    log_warn "Go not available, trying system package..."
    if apt-get update -qq && apt-get install -y -qq caddy 2>/dev/null; then
      :
    fi || true
  }
 fi
 # Download Caddy with Gandi DNS plugin using Caddy's download API
 # The API returns a binary with specified plugins baked in
 CADDY_DOWNLOAD_API="https://caddyserver.com/api/download?os=linux&arch=amd64&p=github.com/caddy-dns/gandi"
 log_info "Downloading Caddy with Gandi DNS plugin..."
 curl -sL "$CADDY_DOWNLOAD_API" -o /tmp/caddy
 chmod +x /tmp/caddy
 # Verify it works
 if ! /tmp/caddy version &>/dev/null; then
  log_error "Caddy binary verification failed"
  exit 1
 fi
 # Check for Gandi plugin
 if ! /tmp/caddy version 2>&1 | grep -qi gandi; then
  log_warn "Gandi plugin not found in Caddy binary - DNS-01 challenge will fail"
 fi
 mv /tmp/caddy "$CADDY_BINARY"
 log_info "Installed Caddy: $CADDY_BINARY"
 # Create Caddy systemd service
 CADDY_SERVICE="/etc/systemd/system/caddy.service"
 cat > "$CADDY_SERVICE" <<EOF
 [Unit]
 Description=Caddy HTTP/HTTPS web server
 Documentation=https://caddyserver.com/docs/
 After=network.target network-online.target
 Wants=network-online.target
 [Service]
 User=nobody
 Group=nogroup
 Type=notify
 ExecStart=/usr/bin/caddy run --config /etc/caddy/Caddyfile --adapter caddyfile
 ExecReload=/usr/bin/caddy reload --config /etc/caddy/Caddyfile --adapter caddyfile
 Restart=on-failure
 RestartSec=5
 [Install]
 WantedBy=multi-user.target
 EOF
 systemctl daemon-reload
 systemctl enable caddy 2>/dev/null || true
 # Create Gandi environment file
 GANDI_ENV="/etc/caddy/gandi.env"
 cat > "$GANDI_ENV" <<EOF
 GANDI_API_KEY=${GANDI_TOKEN}
 EOF
 chmod 600 "$GANDI_ENV"
 # Create Caddyfile with admin API and wildcard cert
 # Note: Caddy auto-generates server names (srv0, srv1, …). lib/caddy.sh
 # discovers the server name dynamically via _discover_server_name() so we
 # don't need to name the server here.
 CADDYFILE="/etc/caddy/Caddyfile"
 # Back up existing Caddyfile before overwriting
 if [ -f "$CADDYFILE" ] && [ ! -f "${CADDYFILE}.pre-disinto" ]; then
  cp "$CADDYFILE" "${CADDYFILE}.pre-disinto"
  log_info "Backed up existing Caddyfile to ${CADDYFILE}.pre-disinto"
 fi
 # Create extra.d directory for operator-owned site blocks
 EXTRA_DIR="/etc/caddy/extra.d"
 mkdir -p "$EXTRA_DIR"
 chmod 0755 "$EXTRA_DIR"
 if getent group caddy >/dev/null 2>&1; then
  chown root:caddy "$EXTRA_DIR"
 else
  log_warn "Group 'caddy' does not exist; extra.d owned by root:root"
 fi
 log_info "Created ${EXTRA_DIR} for operator-owned Caddy config"
 cat > "$CADDYFILE" <<CADDYEOF
 # Caddy configuration for edge control plane
 # Admin API enabled on 127.0.0.1:2019
 {
  admin localhost:2019
 }
 # Default site (reverse proxy for edge tunnels will be added dynamically)
 :80, :443 {
  tls {
    dns gandi {env.GANDI_API_KEY}
  }
 }
 # Operator-owned site blocks (apex, www, static content, etc.)
 import ${EXTRA_CADDYFILE}
 CADDYEOF
 # Start Caddy
 systemctl restart caddy 2>/dev/null || {
  log_warn "Could not start Caddy service (may need manual start)"
  # Try running directly for testing
  /usr/bin/caddy run --config /etc/caddy/Caddyfile --adapter caddyfile &
  sleep 2
 }
 log_info "Caddy configured with admin API on 127.0.0.1:2019"
 # =============================================================================
 # Step 4: Install control plane scripts
 # =============================================================================
 log_info "Installing control plane scripts to ${INSTALL_DIR}..."
 mkdir -p "${INSTALL_DIR}/lib"
 # Copy scripts (overwrite existing to ensure idempotent updates)
 cp "${BASH_SOURCE%/*}/register.sh" "${INSTALL_DIR}/"
 cp "${BASH_SOURCE%/*}/lib/ports.sh" "${INSTALL_DIR}/lib/"
 cp "${BASH_SOURCE%/*}/lib/authorized_keys.sh" "${INSTALL_DIR}/lib/"
 cp "${BASH_SOURCE%/*}/lib/caddy.sh" "${INSTALL_DIR}/lib/"
 chmod +x "${INSTALL_DIR}/register.sh"
 chmod +x "${INSTALL_DIR}/lib/"*.sh
 chown -R root:disinto-register "${INSTALL_DIR}"
 chmod 750 "${INSTALL_DIR}"
 chmod 750 "${INSTALL_DIR}/lib"
 log_info "Control plane scripts installed"
 # =============================================================================
 # Step 5: Set up SSH authorized_keys
 # =============================================================================
 log_info "Setting up SSH authorized_keys..."
 # Create .ssh directories
 mkdir -p /home/disinto-register/.ssh
 mkdir -p /home/disinto-tunnel/.ssh
 # Set permissions
 chmod 700 /home/disinto-register/.ssh
 chmod 700 /home/disinto-tunnel/.ssh
 chown -R disinto-register:disinto-register /home/disinto-register/.ssh
 chown -R disinto-tunnel:disinto-tunnel /home/disinto-tunnel/.ssh
 # Prompt for admin pubkey (for disinto-register user)
 log_info "Please paste your admin SSH public key for the disinto-register user."
 log_info "Paste the entire key (e.g., 'ssh-ed25519 AAAAC3Nza... user@host') and press Enter."
 log_info "Paste key (or press Enter to skip): "
 read -r ADMIN_PUBKEY
 if [ -n "$ADMIN_PUBKEY" ]; then
  echo "$ADMIN_PUBKEY" > /home/disinto-register/.ssh/authorized_keys
  chmod 600 /home/disinto-register/.ssh/authorized_keys
  chown disinto-register:disinto-register /home/disinto-register/.ssh/authorized_keys
  # Add forced command restriction
  # We'll update this after the first register call
  log_info "Admin pubkey added to disinto-register"
 else
  log_warn "No admin pubkey provided - SSH access will be restricted"
  echo "# No admin pubkey configured" > /home/disinto-register/.ssh/authorized_keys
  chmod 600 /home/disinto-register/.ssh/authorized_keys
 fi
 # Create initial authorized_keys for tunnel user
 # Source the library and call the function directly (not as subprocess)
 source "${INSTALL_DIR}/lib/ports.sh"
 source "${INSTALL_DIR}/lib/authorized_keys.sh"
 rebuild_authorized_keys
 # =============================================================================
 # Step 6: Configure forced command for disinto-register
 # =============================================================================
 log_info "Configuring forced command for disinto-register..."
 # Update authorized_keys with forced command
 # Note: This replaces the pubkey line with a restricted version
 if [ -n "$ADMIN_PUBKEY" ]; then
  # Extract key type and key
  KEY_TYPE="${ADMIN_PUBKEY%% *}"
  KEY_DATA="${ADMIN_PUBKEY#* }"
  # Create forced command entry
  FORCED_CMD="restrict,command=\"${INSTALL_DIR}/register.sh\" ${KEY_TYPE} ${KEY_DATA}"
  # Replace the pubkey line
  echo "$FORCED_CMD" > /home/disinto-register/.ssh/authorized_keys
  chmod 600 /home/disinto-register/.ssh/authorized_keys
  chown disinto-register:disinto-register /home/disinto-register/.ssh/authorized_keys
  log_info "Forced command configured: ${INSTALL_DIR}/register.sh"
 fi
 # =============================================================================
 # Step 7: Final configuration
 # =============================================================================
 log_info "Configuring domain suffix: ${DOMAIN_SUFFIX}"
 # Reload systemd if needed
 systemctl daemon-reload 2>/dev/null || true
 # =============================================================================
 # Summary
 # =============================================================================
 echo ""
 log_info "Installation complete!"
 echo ""
 echo "Edge control plane is now running on this host."
 echo ""
 echo "Configuration:"
 echo "  Install directory: ${INSTALL_DIR}"
 echo "  Registry: ${REGISTRY_FILE}"
 echo "  Caddy admin API: http://127.0.0.1:2019"
 echo "  Operator site blocks: ${EXTRA_DIR}/ (import ${EXTRA_CADDYFILE})"
 echo ""
 echo "Users:"
 echo "  disinto-register - SSH forced command (runs ${INSTALL_DIR}/register.sh)"
 echo "  disinto-tunnel   - Reverse tunnel receiver (no shell)"
 echo ""
 echo "Next steps:"
 echo "  1. Verify Caddy is running: systemctl status caddy"
 echo "  2. Test SSH access: ssh disinto-register@localhost 'list'"
 echo "  3. From a dev box, register a tunnel:"
 echo "     disinto edge register <project>"
 echo ""
 echo "To test:"
 echo "  ssh disinto-register@$(hostname) 'list'"
 echo ""
--- a/tools/edge-control/lib/authorized_keys.sh
+++ b/tools/edge-control/lib/authorized_keys.sh
@ -1,99 +0,0 @@
 #!/usr/bin/env bash
 # =============================================================================
 # lib/authorized_keys.sh — Rebuild authorized_keys from registry
 #
 # Rebuilds disinto-tunnel's authorized_keys file from the registry.
 # Each entry has:
 #   - restrict flag (no shell, no X11 forwarding, etc.)
 #   - permitlisten for allowed reverse tunnel ports
 #   - command="/bin/false" to prevent arbitrary command execution
 #
 # Functions:
 #   rebuild_authorized_keys → rebuilds /home/disinto-tunnel/.ssh/authorized_keys
 #   get_tunnel_authorized_keys → prints the generated authorized_keys content
 # =============================================================================
 set -euo pipefail
 # Source ports library (SCRIPT_DIR is this file's directory, so lib/ports.sh is adjacent)
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/ports.sh"
 # Tunnel user home directory
 TUNNEL_USER="disinto-tunnel"
 TUNNEL_SSH_DIR="/home/${TUNNEL_USER}/.ssh"
 TUNNEL_AUTH_KEYS="${TUNNEL_SSH_DIR}/authorized_keys"
 # Ensure tunnel user exists
 _ensure_tunnel_user() {
  if ! id "$TUNNEL_USER" &>/dev/null; then
    useradd -r -s /usr/sbin/nologin -M "$TUNNEL_USER" 2>/dev/null || true
    mkdir -p "$TUNNEL_SSH_DIR"
    chmod 700 "$TUNNEL_SSH_DIR"
  fi
 }
 # Generate the authorized_keys content from registry
 # Output: one authorized_keys line per registered project
 generate_authorized_keys_content() {
  local content=""
  local first=true
  # Get all projects from registry
  while IFS= read -r line; do
    [ -z "$line" ] && continue
    local project port pubkey
    # shellcheck disable=SC2034
    project=$(echo "$line" | jq -r '.name')
    port=$(echo "$line" | jq -r '.port')
    pubkey=$(echo "$line" | jq -r '.pubkey')
    # Skip if missing required fields
    { [ -z "$port" ] || [ -z "$pubkey" ]; } && continue
    # Build the authorized_keys line
    # Format: restrict,port-forwarding,permitlisten="127.0.0.1:<port>",command="/bin/false" <key-type> <key>
    local auth_line="restrict,port-forwarding,permitlisten=\"127.0.0.1:${port}\",command=\"/bin/false\" ${pubkey}"
    if [ "$first" = true ]; then
      content="$auth_line"
      first=false
    else
      content="${content}
 ${auth_line}"
    fi
  done < <(list_ports)
  if [ -z "$content" ]; then
    # No projects registered, create empty file
    echo "# No tunnels registered"
  else
    echo "$content"
  fi
 }
 # Rebuild authorized_keys file
 # Usage: rebuild_authorized_keys
 rebuild_authorized_keys() {
  _ensure_tunnel_user
  local content
  content=$(generate_authorized_keys_content)
  # Write to file
  echo "$content" > "$TUNNEL_AUTH_KEYS"
  chmod 600 "$TUNNEL_AUTH_KEYS"
  chown -R "$TUNNEL_USER":"$TUNNEL_USER" "$TUNNEL_SSH_DIR"
  echo "Rebuilt authorized_keys for ${TUNNEL_USER} (entries: $(echo "$content" | grep -c 'ssh-' || echo 0))" >&2
 }
 # Get the current authorized_keys content (for verification)
 # Usage: get_tunnel_authorized_keys
 get_tunnel_authorized_keys() {
  if [ -f "$TUNNEL_AUTH_KEYS" ]; then
    cat "$TUNNEL_AUTH_KEYS"
  else
    generate_authorized_keys_content
  fi
 }
--- a/tools/edge-control/lib/caddy.sh
+++ b/tools/edge-control/lib/caddy.sh
@ -1,188 +0,0 @@
 #!/usr/bin/env bash
 # =============================================================================
 # lib/caddy.sh — Caddy admin API wrapper
 #
 # Interacts with Caddy admin API on 127.0.0.1:2019 to:
 # - Add site blocks for <project>.disinto.ai → reverse_proxy 127.0.0.1:<port>
 # - Remove site blocks when deregistering
 #
 # Functions:
 #   add_route <project> <port> → adds Caddy site block
 #   remove_route <project> → removes Caddy site block
 #   reload_caddy → sends POST /reload to apply changes
 # =============================================================================
 set -euo pipefail
 # Caddy admin API endpoint
 CADDY_ADMIN_URL="${CADDY_ADMIN_URL:-http://127.0.0.1:2019}"
 # Domain suffix for projects
 DOMAIN_SUFFIX="${DOMAIN_SUFFIX:-disinto.ai}"
 # Discover the Caddy server name that listens on :80/:443
 # Usage: _discover_server_name
 _discover_server_name() {
  local server_name
  server_name=$(curl -sS "${CADDY_ADMIN_URL}/config/apps/http/servers" \
    | jq -r 'to_entries | map(select(.value.listen[]? | test(":(80|443)$"))) | .[0].key // empty') || {
    echo "Error: could not query Caddy admin API for servers" >&2
    return 1
  }
  if [ -z "$server_name" ]; then
    echo "Error: could not find a Caddy server listening on :80/:443" >&2
    return 1
  fi
  echo "$server_name"
 }
 # Add a route for a project
 # Usage: add_route <project> <port>
 add_route() {
  local project="$1"
  local port="$2"
  local fqdn="${project}.${DOMAIN_SUFFIX}"
  local server_name
  server_name=$(_discover_server_name) || return 1
  # Build the route configuration (partial config)
  local route_config
  route_config=$(cat <<EOF
 {
  "match": [
    {
      "host": ["${fqdn}"]
    }
  ],
  "handle": [
    {
      "handler": "subroute",
      "routes": [
        {
          "handle": [
            {
              "handler": "reverse_proxy",
              "upstreams": [
                {
                  "dial": "127.0.0.1:${port}"
                }
              ]
            }
          ]
        }
      ]
    }
  ]
 }
 EOF
 )
  # Append route via admin API, checking HTTP status
  local response status body
  response=$(curl -sS -w '\n%{http_code}' -X POST \
    "${CADDY_ADMIN_URL}/config/apps/http/servers/${server_name}/routes" \
    -H "Content-Type: application/json" \
    -d "$route_config") || {
    echo "Error: failed to add route for ${fqdn}" >&2
    return 1
  }
  status=$(echo "$response" | tail -n1)
  body=$(echo "$response" | sed '$d')
  if [ "$status" -ge 400 ]; then
    echo "Error: Caddy admin API returned ${status}: ${body}" >&2
    return 1
  fi
  echo "Added route: ${fqdn} → 127.0.0.1:${port}" >&2
 }
 # Remove a route for a project
 # Usage: remove_route <project>
 remove_route() {
  local project="$1"
  local fqdn="${project}.${DOMAIN_SUFFIX}"
  local server_name
  server_name=$(_discover_server_name) || return 1
  # First, get current routes, checking HTTP status
  local response status body
  response=$(curl -sS -w '\n%{http_code}' \
    "${CADDY_ADMIN_URL}/config/apps/http/servers/${server_name}/routes") || {
    echo "Error: failed to get current routes" >&2
    return 1
  }
  status=$(echo "$response" | tail -n1)
  body=$(echo "$response" | sed '$d')
  if [ "$status" -ge 400 ]; then
    echo "Error: Caddy admin API returned ${status}: ${body}" >&2
    return 1
  fi
  # Find the route index that matches our fqdn using jq
  local route_index
  route_index=$(echo "$body" | jq -r "to_entries[] | select(.value.match[]?.host[]? == \"${fqdn}\") | .key" 2>/dev/null | head -1)
  if [ -z "$route_index" ] || [ "$route_index" = "null" ]; then
    echo "Warning: route for ${fqdn} not found" >&2
    return 0
  fi
  # Delete the route at the found index, checking HTTP status
  response=$(curl -sS -w '\n%{http_code}' -X DELETE \
    "${CADDY_ADMIN_URL}/config/apps/http/servers/${server_name}/routes/${route_index}" \
    -H "Content-Type: application/json") || {
    echo "Error: failed to remove route for ${fqdn}" >&2
    return 1
  }
  status=$(echo "$response" | tail -n1)
  body=$(echo "$response" | sed '$d')
  if [ "$status" -ge 400 ]; then
    echo "Error: Caddy admin API returned ${status}: ${body}" >&2
    return 1
  fi
  echo "Removed route: ${fqdn}" >&2
 }
 # Reload Caddy to apply configuration changes
 # Usage: reload_caddy
 reload_caddy() {
  local response status body
  response=$(curl -sS -w '\n%{http_code}' -X POST \
    "${CADDY_ADMIN_URL}/reload") || {
    echo "Error: failed to reload Caddy" >&2
    return 1
  }
  status=$(echo "$response" | tail -n1)
  body=$(echo "$response" | sed '$d')
  if [ "$status" -ge 400 ]; then
    echo "Error: Caddy reload returned ${status}: ${body}" >&2
    return 1
  fi
  echo "Caddy reloaded" >&2
 }
 # Get Caddy config for debugging
 # Usage: get_caddy_config
 get_caddy_config() {
  curl -s "${CADDY_ADMIN_URL}/config"
 }
 # Check if Caddy admin API is reachable
 # Usage: check_caddy_health
 check_caddy_health() {
  local response
  response=$(curl -s -o /dev/null -w "%{http_code}" \
    "${CADDY_ADMIN_URL}/" 2>/dev/null) || response="000"
  if [ "$response" = "200" ]; then
    return 0
  else
    echo "Caddy admin API not reachable (HTTP ${response})" >&2
    return 1
  fi
 }
--- a/tools/edge-control/lib/ports.sh
+++ b/tools/edge-control/lib/ports.sh
@ -1,202 +0,0 @@
 #!/usr/bin/env bash
 # =============================================================================
 # lib/ports.sh — Port allocator for edge control plane
 #
 # Manages port allocation in the range 20000-29999.
 # Uses flock-based concurrency control over registry.json.
 #
 # Functions:
 #   allocate_port <project> <pubkey> <fqdn> → writes to registry, returns port
 #   free_port <project> → removes project from registry
 #   get_port <project> → returns assigned port or empty
 #   list_ports → prints all projects with port/FQDN
 # =============================================================================
 set -euo pipefail
 # Directory containing registry files
 REGISTRY_DIR="${REGISTRY_DIR:-/var/lib/disinto}"
 REGISTRY_FILE="${REGISTRY_DIR}/registry.json"
 LOCK_FILE="${REGISTRY_DIR}/registry.lock"
 # Port range
 PORT_MIN=20000
 PORT_MAX=29999
 # Ensure registry directory exists
 _ensure_registry_dir() {
  if [ ! -d "$REGISTRY_DIR" ]; then
    mkdir -p "$REGISTRY_DIR"
    chmod 0750 "$REGISTRY_DIR"
    chown root:disinto-register "$REGISTRY_DIR"
  fi
  if [ ! -f "$LOCK_FILE" ]; then
    touch "$LOCK_FILE"
    chmod 0644 "$LOCK_FILE"
  fi
 }
 # Read current registry, returns JSON or empty string
 _registry_read() {
  if [ -f "$REGISTRY_FILE" ]; then
    cat "$REGISTRY_FILE"
  else
    echo '{"version":1,"projects":{}}'
  fi
 }
 # Write registry atomically (write to temp, then mv)
 _registry_write() {
  local tmp_file
  tmp_file=$(mktemp "${REGISTRY_DIR}/registry.XXXXXX")
  echo "$1" > "$tmp_file"
  mv -f "$tmp_file" "$REGISTRY_FILE"
  chmod 0644 "$REGISTRY_FILE"
 }
 # Allocate a port for a project
 # Usage: allocate_port <project> <pubkey> <fqdn>
 # Returns: port number on stdout
 # Writes: registry.json with project entry
 allocate_port() {
  local project="$1"
  local pubkey="$2"
  local fqdn="$3"
  _ensure_registry_dir
  # Use flock for concurrency control
  exec 200>"$LOCK_FILE"
  flock -x 200
  local registry
  registry=$(_registry_read)
  # Check if project already has a port assigned
  local existing_port
  existing_port=$(echo "$registry" | jq -r ".projects[\"$project\"].port // empty" 2>/dev/null) || existing_port=""
  if [ -n "$existing_port" ]; then
    # Project already registered, return existing port
    echo "$existing_port"
    return 0
  fi
  # Find an available port
  local port assigned=false
  local used_ports
  used_ports=$(echo "$registry" | jq -r '.projects | to_entries | map(.value.port) | .[]' 2>/dev/null) || used_ports=""
  for candidate in $(seq $PORT_MIN $PORT_MAX); do
    # Check if port is already used
    local in_use=false
    if echo "$used_ports" | grep -qx "$candidate"; then
      in_use=true
    fi
    if [ "$in_use" = false ]; then
      port=$candidate
      assigned=true
      break
    fi
  done
  if [ "$assigned" = false ]; then
    echo "Error: no available ports in range ${PORT_MIN}-${PORT_MAX}" >&2
    return 1
  fi
  # Get current timestamp
  local timestamp
  timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
  # Add project to registry
  local new_registry
  new_registry=$(echo "$registry" | jq --arg project "$project" \
    --argjson port "$port" \
    --arg pubkey "$pubkey" \
    --arg fqdn "$fqdn" \
    --arg timestamp "$timestamp" \
    '.projects[$project] = {
      "port": $port,
      "fqdn": $fqdn,
      "pubkey": $pubkey,
      "registered_at": $timestamp
    }')
  _registry_write "$new_registry"
  echo "$port"
 }
 # Free a port (remove project from registry)
 # Usage: free_port <project>
 # Returns: 0 on success, 1 if project not found
 free_port() {
  local project="$1"
  _ensure_registry_dir
  # Use flock for concurrency control
  exec 200>"$LOCK_FILE"
  flock -x 200
  local registry
  registry=$(_registry_read)
  # Check if project exists
  local existing_port
  existing_port=$(echo "$registry" | jq -r ".projects[\"$project\"].port // empty" 2>/dev/null) || existing_port=""
  if [ -z "$existing_port" ]; then
    echo "Error: project '$project' not found in registry" >&2
    return 1
  fi
  # Remove project from registry
  local new_registry
  new_registry=$(echo "$registry" | jq --arg project "$project" 'del(.projects[$project])')
  _registry_write "$new_registry"
  echo "$existing_port"
 }
 # Get the port for a project
 # Usage: get_port <project>
 # Returns: port number or empty string
 get_port() {
  local project="$1"
  _ensure_registry_dir
  local registry
  registry=$(_registry_read)
  echo "$registry" | jq -r ".projects[\"$project\"].port // empty" 2>/dev/null || echo ""
 }
 # List all registered projects with their ports and FQDNs
 # Usage: list_ports
 # Returns: JSON array of projects
 list_ports() {
  _ensure_registry_dir
  local registry
  registry=$(_registry_read)
  echo "$registry" | jq -r '.projects | to_entries | map({name: .key, port: .value.port, fqdn: .value.fqdn}) | .[] | @json' 2>/dev/null
 }
 # Get full project info from registry
 # Usage: get_project_info <project>
 # Returns: JSON object with project details
 get_project_info() {
  local project="$1"
  _ensure_registry_dir
  local registry
  registry=$(_registry_read)
  echo "$registry" | jq -c ".projects[\"$project\"] // empty" 2>/dev/null || echo ""
 }
--- a/tools/edge-control/register.sh
+++ b/tools/edge-control/register.sh
@ -1,196 +0,0 @@
 #!/usr/bin/env bash
 # =============================================================================
 # register.sh — SSH forced-command handler for edge control plane
 #
 # This script runs as a forced command for the disinto-register SSH user.
 # It parses SSH_ORIGINAL_COMMAND and dispatches to register|deregister|list.
 #
 # Usage (via SSH):
 #   ssh disinto-register@edge "register <project> <pubkey>"
 #   ssh disinto-register@edge "deregister <project>"
 #   ssh disinto-register@edge "list"
 #
 # Output: JSON on stdout
 # =============================================================================
 set -euo pipefail
 # Script directory
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # Source libraries
 source "${SCRIPT_DIR}/lib/ports.sh"
 source "${SCRIPT_DIR}/lib/caddy.sh"
 source "${SCRIPT_DIR}/lib/authorized_keys.sh"
 # Domain suffix
 DOMAIN_SUFFIX="${DOMAIN_SUFFIX:-disinto.ai}"
 # Print usage
 usage() {
  cat <<EOF
 Usage:
  register <project> <pubkey>       Register a new tunnel
  deregister <project>              Remove a tunnel
  list                              List all registered tunnels
 Example:
  ssh disinto-register@edge "register myproject ssh-ed25519 AAAAC3..."
 EOF
  exit 1
 }
 # TODO(#713): Subdomain fallback — if subpath routing (#704/#708) fails, this
 # function would need to register additional routes for forge.<project>,
 # ci.<project>, chat.<project> subdomains (or accept a --subdomain parameter).
 # See docs/edge-routing-fallback.md for the full pivot plan.
 # Register a new tunnel
 # Usage: do_register <project> <pubkey>
 do_register() {
  local project="$1"
  local pubkey="$2"
  # Validate project name (alphanumeric, hyphens, underscores)
  if ! [[ "$project" =~ ^[a-zA-Z0-9_-]+$ ]]; then
    echo '{"error":"invalid project name"}'
    exit 1
  fi
  # Extract key type and key from pubkey (format: "ssh-ed25519 AAAAC3...")
  local key_type key
  key_type=$(echo "$pubkey" | awk '{print $1}')
  key=$(echo "$pubkey" | awk '{print $2}')
  if [ -z "$key_type" ] || [ -z "$key" ]; then
    echo '{"error":"invalid pubkey format"}'
    exit 1
  fi
  # Validate key type
  if ! [[ "$key_type" =~ ^(ssh-ed25519|ssh-rsa|ecdsa-sha2-nistp256|ecdsa-sha2-nistp384|ecdsa-sha2-nistp521)$ ]]; then
    echo '{"error":"unsupported key type"}'
    exit 1
  fi
  # Full pubkey for registry
  local full_pubkey="${key_type} ${key}"
  # Allocate port (idempotent - returns existing if already registered)
  local port
  port=$(allocate_port "$project" "$full_pubkey" "${project}.${DOMAIN_SUFFIX}")
  # Add Caddy route
  add_route "$project" "$port"
  # Rebuild authorized_keys for tunnel user
  rebuild_authorized_keys
  # Reload Caddy
  reload_caddy
  # Return JSON response
  echo "{\"port\":${port},\"fqdn\":\"${project}.${DOMAIN_SUFFIX}\"}"
 }
 # Deregister a tunnel
 # Usage: do_deregister <project>
 do_deregister() {
  local project="$1"
  # Get current port before removing
  local port
  port=$(get_port "$project")
  if [ -z "$port" ]; then
    echo '{"error":"project not found"}'
    exit 1
  fi
  # Remove from registry
  free_port "$project" >/dev/null
  # Remove Caddy route
  remove_route "$project"
  # Rebuild authorized_keys for tunnel user
  rebuild_authorized_keys
  # Reload Caddy
  reload_caddy
  # Return JSON response
  echo "{\"removed\":true,\"port\":${port},\"fqdn\":\"${project}.${DOMAIN_SUFFIX}\"}"
 }
 # List all registered tunnels
 # Usage: do_list
 do_list() {
  local result='{"tunnels":['
  local first=true
  while IFS= read -r line; do
    [ -z "$line" ] && continue
    if [ "$first" = true ]; then
      first=false
    else
      result="${result},"
    fi
    result="${result}${line}"
  done < <(list_ports)
  result="${result}]}"
  echo "$result"
 }
 # Main dispatch
 main() {
  # Get the SSH_ORIGINAL_COMMAND
  local command="${SSH_ORIGINAL_COMMAND:-}"
  if [ -z "$command" ]; then
    echo '{"error":"no command provided"}'
    exit 1
  fi
  # Parse command
  local cmd="${command%% *}"
  local args="${command#* }"
  # Handle commands
  case "$cmd" in
    register)
      # register <project> <pubkey>
      local project="${args%% *}"
      local pubkey="${args#* }"
      # Handle case where pubkey might have spaces (rare but possible with some formats)
      if [ "$pubkey" = "$args" ]; then
        pubkey=""
      fi
      if [ -z "$project" ] || [ -z "$pubkey" ]; then
        echo '{"error":"register requires <project> <pubkey>"}'
        exit 1
      fi
      do_register "$project" "$pubkey"
      ;;
    deregister)
      # deregister <project>
      local project="$args"
      if [ -z "$project" ]; then
        echo '{"error":"deregister requires <project>"}'
        exit 1
      fi
      do_deregister "$project"
      ;;
    list)
      do_list
      ;;
    *)
      echo '{"error":"unknown command: '"$cmd"'" }'
      usage
      ;;
  esac
 }
 main "$@"
--- a/tools/edge-control/verify-chat-sandbox.sh
+++ b/tools/edge-control/verify-chat-sandbox.sh
@ -1,113 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # verify-chat-sandbox.sh — One-shot sandbox verification for disinto-chat (#706)
 #
 # Runs against a live compose project and asserts hardening constraints.
 # Exit 0 if all pass, non-zero otherwise.
 CONTAINER="disinto-chat"
 PASS=0
 FAIL=0
 pass() { printf '  ✓ %s\n' "$1"; PASS=$((PASS + 1)); }
 fail() { printf '  ✗ %s\n' "$1"; FAIL=$((FAIL + 1)); }
 echo "=== disinto-chat sandbox verification ==="
 echo
 # --- docker inspect checks ---
 inspect_json=$(docker inspect "$CONTAINER" 2>/dev/null) || {
    echo "ERROR: container '$CONTAINER' not found or not running"
    exit 1
 }
 # ReadonlyRootfs
 readonly_rootfs=$(echo "$inspect_json" | python3 -c "import sys,json; print(json.load(sys.stdin)[0]['HostConfig']['ReadonlyRootfs'])")
 if [ "$readonly_rootfs" = "True" ]; then
    pass "ReadonlyRootfs=true"
 else
    fail "ReadonlyRootfs expected true, got $readonly_rootfs"
 fi
 # CapAdd — should be null or empty
 cap_add=$(echo "$inspect_json" | python3 -c "import sys,json; print(json.load(sys.stdin)[0]['HostConfig']['CapAdd'])")
 if [ "$cap_add" = "None" ] || [ "$cap_add" = "[]" ]; then
    pass "CapAdd=null (no extra capabilities)"
 else
    fail "CapAdd expected null, got $cap_add"
 fi
 # CapDrop — should contain ALL
 cap_drop=$(echo "$inspect_json" | python3 -c "import sys,json; caps=json.load(sys.stdin)[0]['HostConfig']['CapDrop'] or []; print(' '.join(caps))")
 if echo "$cap_drop" | grep -q "ALL"; then
    pass "CapDrop contains ALL"
 else
    fail "CapDrop expected ALL, got: $cap_drop"
 fi
 # PidsLimit
 pids_limit=$(echo "$inspect_json" | python3 -c "import sys,json; print(json.load(sys.stdin)[0]['HostConfig']['PidsLimit'])")
 if [ "$pids_limit" = "128" ]; then
    pass "PidsLimit=128"
 else
    fail "PidsLimit expected 128, got $pids_limit"
 fi
 # Memory limit (512MB = 536870912 bytes)
 mem_limit=$(echo "$inspect_json" | python3 -c "import sys,json; print(json.load(sys.stdin)[0]['HostConfig']['Memory'])")
 if [ "$mem_limit" = "536870912" ]; then
    pass "Memory=512m"
 else
    fail "Memory expected 536870912, got $mem_limit"
 fi
 # SecurityOpt — must contain no-new-privileges
 sec_opt=$(echo "$inspect_json" | python3 -c "import sys,json; opts=json.load(sys.stdin)[0]['HostConfig']['SecurityOpt'] or []; print(' '.join(opts))")
 if echo "$sec_opt" | grep -q "no-new-privileges"; then
    pass "SecurityOpt contains no-new-privileges"
 else
    fail "SecurityOpt missing no-new-privileges (got: $sec_opt)"
 fi
 # No docker.sock bind mount
 binds=$(echo "$inspect_json" | python3 -c "import sys,json; binds=json.load(sys.stdin)[0]['HostConfig']['Binds'] or []; print(' '.join(binds))")
 if echo "$binds" | grep -q "docker.sock"; then
    fail "docker.sock is bind-mounted"
 else
    pass "No docker.sock mount"
 fi
 echo
 # --- runtime exec checks ---
 # touch /root/x should fail (read-only rootfs + unprivileged user)
 if docker exec "$CONTAINER" touch /root/x 2>/dev/null; then
    fail "touch /root/x succeeded (should fail)"
 else
    pass "touch /root/x correctly denied"
 fi
 # /var/run/docker.sock must not exist
 if docker exec "$CONTAINER" ls /var/run/docker.sock 2>/dev/null; then
    fail "/var/run/docker.sock is accessible"
 else
    pass "/var/run/docker.sock not accessible"
 fi
 # /etc/shadow should not be readable
 if docker exec "$CONTAINER" cat /etc/shadow 2>/dev/null; then
    fail "cat /etc/shadow succeeded (should fail)"
 else
    pass "cat /etc/shadow correctly denied"
 fi
 echo
 echo "=== Results: $PASS passed, $FAIL failed ==="
 if [ "$FAIL" -gt 0 ]; then
    exit 1
 fi
 exit 0
--- a/action-vault/SCHEMA.md
+++ b/action-vault/SCHEMA.md
@ -50,7 +50,7 @@ blast_radius = "low"       # optional: overrides policy.toml tier ("low"|"medium
 ## Secret Names
-Secret names must have a corresponding `secrets/<NAME>.enc` file (age-encrypted). The vault validates that requested secrets exist in the allowlist before execution.
+Secret names must be defined in `.env.vault.enc` on the ops repo. The vault validates that requested secrets exist in the allowlist before execution.
 Common secret names:
 - `CLAWHUB_TOKEN` - Token for ClawHub skill publishing
--- a/action-vault/classify.sh
+++ b/action-vault/classify.sh
--- a/action-vault/examples/promote.toml
+++ b/action-vault/examples/promote.toml
--- a/action-vault/examples/publish.toml
+++ b/action-vault/examples/publish.toml
--- a/action-vault/examples/release.toml
+++ b/action-vault/examples/release.toml
--- a/action-vault/examples/webhook-call.toml
+++ b/action-vault/examples/webhook-call.toml
--- a/action-vault/policy.toml
+++ b/action-vault/policy.toml
--- a/Show more
+++ b/Show more