Compare commits

..

2 commits

Author SHA1 Message Date
Claude
bcc8397e52 ci: retrigger smoke-init pipeline
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
2026-03-28 15:36:46 +00:00
Claude
e74723982d fix: fix: smoke-init should only run on pull_request events, not push (#21)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/smoke-init Pipeline failed
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 15:26:08 +00:00
137 changed files with 7387 additions and 14562 deletions

View file

@ -1,20 +0,0 @@
# Secrets — prevent .env files from being baked into the image
.env
.env.enc
.env.vault
.env.vault.enc
# Version control — .git is huge and not needed in image
.git
# Archives — not needed at runtime
*.tar.gz
# Prometheus data — large, ephemeral data
prometheus-data/
# Compose files — only needed at runtime via volume mount
docker-compose.yml
# Project TOML files — gitignored anyway, won't be in build context
projects/*.toml

View file

@ -19,43 +19,21 @@ FORGE_URL=http://localhost:3000 # [CONFIG] local Forgejo instance
# ── Auth tokens ─────────────────────────────────────────────────────────── # ── Auth tokens ───────────────────────────────────────────────────────────
# Each agent has its own Forgejo account and API token (#747). # Each agent has its own Forgejo account and API token (#747).
# Per-agent tokens fall back to FORGE_TOKEN if not set. # Per-agent tokens fall back to FORGE_TOKEN if not set.
#
# Tokens and passwords are auto-generated by `disinto init` and stored in .env.
# Each bot user gets:
# - FORGE_TOKEN_<BOT> = API token for REST calls (user identity via /api/v1/user)
# - FORGE_PASS_<BOT> = password for git HTTP push (#361, Forgejo 11.x limitation)
#
# Local-model agents (agents-llama) use FORGE_TOKEN_LLAMA / FORGE_PASS_LLAMA
# with FORGE_BOT_USER_LLAMA=dev-qwen to ensure correct attribution (#563).
FORGE_TOKEN= # [SECRET] dev-bot API token (default for all agents) FORGE_TOKEN= # [SECRET] dev-bot API token (default for all agents)
FORGE_PASS= # [SECRET] dev-bot password for git HTTP push (#361)
FORGE_TOKEN_LLAMA= # [SECRET] dev-qwen API token (for agents-llama)
FORGE_PASS_LLAMA= # [SECRET] dev-qwen password for git HTTP push
FORGE_REVIEW_TOKEN= # [SECRET] review-bot API token FORGE_REVIEW_TOKEN= # [SECRET] review-bot API token
FORGE_REVIEW_PASS= # [SECRET] review-bot password for git HTTP push
FORGE_PLANNER_TOKEN= # [SECRET] planner-bot API token FORGE_PLANNER_TOKEN= # [SECRET] planner-bot API token
FORGE_PLANNER_PASS= # [SECRET] planner-bot password for git HTTP push
FORGE_GARDENER_TOKEN= # [SECRET] gardener-bot API token FORGE_GARDENER_TOKEN= # [SECRET] gardener-bot API token
FORGE_GARDENER_PASS= # [SECRET] gardener-bot password for git HTTP push
FORGE_VAULT_TOKEN= # [SECRET] vault-bot API token FORGE_VAULT_TOKEN= # [SECRET] vault-bot API token
FORGE_VAULT_PASS= # [SECRET] vault-bot password for git HTTP push
FORGE_SUPERVISOR_TOKEN= # [SECRET] supervisor-bot API token FORGE_SUPERVISOR_TOKEN= # [SECRET] supervisor-bot API token
FORGE_SUPERVISOR_PASS= # [SECRET] supervisor-bot password for git HTTP push
FORGE_PREDICTOR_TOKEN= # [SECRET] predictor-bot API token FORGE_PREDICTOR_TOKEN= # [SECRET] predictor-bot API token
FORGE_PREDICTOR_PASS= # [SECRET] predictor-bot password for git HTTP push FORGE_ACTION_TOKEN= # [SECRET] action-bot API token
FORGE_ARCHITECT_TOKEN= # [SECRET] architect-bot API token FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,action-bot
FORGE_ARCHITECT_PASS= # [SECRET] architect-bot password for git HTTP push
FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot
# ── Backwards compatibility ─────────────────────────────────────────────── # ── Backwards compatibility ───────────────────────────────────────────────
# If CODEBERG_TOKEN is set but FORGE_TOKEN is not, env.sh falls back to # If CODEBERG_TOKEN is set but FORGE_TOKEN is not, env.sh falls back to
# CODEBERG_TOKEN automatically (same for REVIEW_BOT_TOKEN, CODEBERG_REPO, # CODEBERG_TOKEN automatically (same for REVIEW_BOT_TOKEN, CODEBERG_REPO,
# CODEBERG_BOT_USERNAMES). No action needed for existing deployments. # CODEBERG_BOT_USERNAMES). No action needed for existing deployments.
# Per-agent tokens default to FORGE_TOKEN when unset (single-token setups). # Per-agent tokens default to FORGE_TOKEN when unset (single-token setups).
#
# Note: `disinto init` auto-generates all bot tokens/passwords when you
# configure [agents.llama] in a project TOML. The credentials are stored
# in .env.enc (encrypted) or .env (plaintext fallback).
# ── Woodpecker CI ───────────────────────────────────────────────────────── # ── Woodpecker CI ─────────────────────────────────────────────────────────
WOODPECKER_TOKEN= # [SECRET] Woodpecker API token WOODPECKER_TOKEN= # [SECRET] Woodpecker API token
@ -71,7 +49,7 @@ WOODPECKER_DB_NAME=woodpecker # [CONFIG] Postgres database name
# ── Vault-only secrets (DO NOT put these in .env) ──────────────────────── # ── Vault-only secrets (DO NOT put these in .env) ────────────────────────
# These tokens grant access to external systems (GitHub, ClawHub, deploy targets). # These tokens grant access to external systems (GitHub, ClawHub, deploy targets).
# They live ONLY in .env.vault.enc and are injected into the ephemeral runner # They live ONLY in .env.vault.enc and are injected into the ephemeral vault-runner
# container at fire time (#745). lib/env.sh explicitly unsets them so agents # container at fire time (#745). lib/env.sh explicitly unsets them so agents
# can never hold them directly — all external actions go through vault dispatch. # can never hold them directly — all external actions go through vault dispatch.
# #
@ -80,7 +58,7 @@ WOODPECKER_DB_NAME=woodpecker # [CONFIG] Postgres database name
# (deploy keys) — SSH keys for deployment targets # (deploy keys) — SSH keys for deployment targets
# #
# To manage vault secrets: disinto secrets edit-vault # To manage vault secrets: disinto secrets edit-vault
# (vault redesign in progress: PR-based approval, see #73-#77) # See also: vault/vault-run-action.sh, vault/vault-fire.sh
# ── Project-specific secrets ────────────────────────────────────────────── # ── Project-specific secrets ──────────────────────────────────────────────
# Store all project secrets here so formulas reference env vars, never hardcode. # Store all project secrets here so formulas reference env vars, never hardcode.

6
.gitignore vendored
View file

@ -22,9 +22,3 @@ metrics/supervisor-metrics.jsonl
.DS_Store .DS_Store
dev/ci-fixes-*.json dev/ci-fixes-*.json
gardener/dust.jsonl gardener/dust.jsonl
# Individual encrypted secrets (managed by disinto secrets add)
secrets/
# Pre-built binaries for Docker builds (avoid network calls during build)
docker/agents/bin/

View file

@ -6,6 +6,8 @@
# 2. Every custom function called by agent scripts is defined in lib/ or the script itself # 2. Every custom function called by agent scripts is defined in lib/ or the script itself
# #
# Fast (<10s): no network, no tmux, no Claude needed. # Fast (<10s): no network, no tmux, no Claude needed.
# Would have caught: kill_tmux_session (renamed), create_agent_session (missing),
# read_phase (missing from dev-agent.sh scope)
set -euo pipefail set -euo pipefail
@ -19,16 +21,12 @@ FAILED=0
# Uses awk instead of grep -Eo for busybox/Alpine compatibility (#296). # Uses awk instead of grep -Eo for busybox/Alpine compatibility (#296).
get_fns() { get_fns() {
local f="$1" local f="$1"
# Pure-awk implementation: avoids grep/sed cross-platform differences # BRE mode (no -E). Use [(][)] for literal parens — unambiguous across
# (BusyBox grep BRE quirks, sed ; separator issues on Alpine). # GNU grep and BusyBox grep (some BusyBox builds treat bare () as grouping
awk ' # even in BRE). BRE one-or-more via [X][X]* instead of +.
/^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]/ { grep '^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]' "$f" 2>/dev/null \
line = $0 | sed 's/^[[:space:]]*//; s/[[:space:]]*[(][)].*$//' \
gsub(/^[[:space:]]+/, "", line) | sort -u || true
sub(/[[:space:]]*[(].*/, "", line)
print line
}
' "$f" 2>/dev/null | sort -u || true
} }
# Extract call-position identifiers that look like custom function calls: # Extract call-position identifiers that look like custom function calls:
@ -86,7 +84,7 @@ while IFS= read -r -d '' f; do
printf 'FAIL [syntax] %s\n' "$f" printf 'FAIL [syntax] %s\n' "$f"
FAILED=1 FAILED=1
fi fi
done < <(find dev gardener review planner supervisor architect lib vault -name "*.sh" -print0 2>/dev/null) done < <(find dev gardener review planner supervisor lib vault action -name "*.sh" -print0 2>/dev/null)
echo "syntax check done" echo "syntax check done"
# ── 2. Function-resolution check ───────────────────────────────────────────── # ── 2. Function-resolution check ─────────────────────────────────────────────
@ -97,14 +95,15 @@ echo "=== 2/2 Function resolution ==="
# #
# Included — these are inline-sourced by agent scripts: # Included — these are inline-sourced by agent scripts:
# lib/env.sh — sourced by every agent (log, forge_api, etc.) # lib/env.sh — sourced by every agent (log, forge_api, etc.)
# lib/agent-session.sh — sourced by orchestrators (create_agent_session, monitor_phase_loop, etc.)
# lib/agent-sdk.sh — sourced by SDK agents (agent_run, agent_recover_session) # lib/agent-sdk.sh — sourced by SDK agents (agent_run, agent_recover_session)
# lib/ci-helpers.sh — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.) # lib/ci-helpers.sh — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.)
# lib/load-project.sh — sourced by env.sh when PROJECT_TOML is set # lib/load-project.sh — sourced by env.sh when PROJECT_TOML is set
# lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue) # lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue)
# lib/secret-scan.sh — sourced by file-action-issue.sh (scan_for_secrets, redact_secrets) # lib/secret-scan.sh — sourced by file-action-issue.sh, phase-handler.sh (scan_for_secrets, redact_secrets)
# lib/formula-session.sh — sourced by formula-driven agents (acquire_run_lock, check_memory, etc.) # lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, run_formula_and_monitor, etc.)
# lib/mirrors.sh — sourced by merge sites (mirror_push) # lib/mirrors.sh — sourced by merge sites (mirror_push)
# lib/guard.sh — sourced by all polling-loop entry points (check_active) # lib/guard.sh — sourced by all cron entry points (check_active)
# lib/issue-lifecycle.sh — sourced by agents for issue claim/release/block/deps # lib/issue-lifecycle.sh — sourced by agents for issue claim/release/block/deps
# lib/worktree.sh — sourced by agents for worktree create/recover/cleanup/preserve # lib/worktree.sh — sourced by agents for worktree create/recover/cleanup/preserve
# #
@ -117,7 +116,7 @@ echo "=== 2/2 Function resolution ==="
# If a new lib file is added and sourced by agents, add it to LIB_FUNS below # If a new lib file is added and sourced by agents, add it to LIB_FUNS below
# and add a check_script call for it in the lib files section further down. # and add a check_script call for it in the lib files section further down.
LIB_FUNS=$( LIB_FUNS=$(
for f in lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do for f in lib/agent-session.sh lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do
if [ -f "$f" ]; then get_fns "$f"; fi if [ -f "$f" ]; then get_fns "$f"; fi
done | sort -u done | sort -u
) )
@ -181,12 +180,13 @@ check_script() {
# These are already in LIB_FUNS (their definitions are available to agents), # These are already in LIB_FUNS (their definitions are available to agents),
# but this verifies calls *within* each lib file are also resolvable. # but this verifies calls *within* each lib file are also resolvable.
check_script lib/env.sh lib/mirrors.sh check_script lib/env.sh lib/mirrors.sh
check_script lib/agent-session.sh
check_script lib/agent-sdk.sh check_script lib/agent-sdk.sh
check_script lib/ci-helpers.sh check_script lib/ci-helpers.sh
check_script lib/secret-scan.sh check_script lib/secret-scan.sh
check_script lib/file-action-issue.sh lib/secret-scan.sh check_script lib/file-action-issue.sh lib/secret-scan.sh
check_script lib/tea-helpers.sh lib/secret-scan.sh check_script lib/tea-helpers.sh lib/secret-scan.sh
check_script lib/formula-session.sh check_script lib/formula-session.sh lib/agent-session.sh
check_script lib/load-project.sh check_script lib/load-project.sh
check_script lib/mirrors.sh lib/env.sh check_script lib/mirrors.sh lib/env.sh
check_script lib/guard.sh check_script lib/guard.sh
@ -199,19 +199,26 @@ check_script lib/ci-debug.sh
check_script lib/parse-deps.sh check_script lib/parse-deps.sh
# Agent scripts — list cross-sourced files where function scope flows across files. # Agent scripts — list cross-sourced files where function scope flows across files.
# phase-handler.sh defines default callback stubs; sourcing agents may override.
check_script dev/dev-agent.sh check_script dev/dev-agent.sh
check_script dev/phase-handler.sh lib/secret-scan.sh
check_script dev/dev-poll.sh check_script dev/dev-poll.sh
check_script dev/phase-test.sh check_script dev/phase-test.sh
check_script gardener/gardener-run.sh check_script gardener/gardener-run.sh
check_script review/review-pr.sh lib/agent-sdk.sh check_script review/review-pr.sh lib/agent-sdk.sh
check_script review/review-poll.sh check_script review/review-poll.sh
check_script planner/planner-run.sh lib/formula-session.sh check_script planner/planner-run.sh lib/agent-session.sh lib/formula-session.sh
check_script supervisor/supervisor-poll.sh check_script supervisor/supervisor-poll.sh
check_script supervisor/update-prompt.sh check_script supervisor/update-prompt.sh
check_script supervisor/supervisor-run.sh lib/formula-session.sh check_script vault/vault-agent.sh
check_script vault/vault-fire.sh
check_script vault/vault-poll.sh
check_script vault/vault-reject.sh
check_script action/action-poll.sh
check_script action/action-agent.sh
check_script supervisor/supervisor-run.sh
check_script supervisor/preflight.sh check_script supervisor/preflight.sh
check_script predictor/predictor-run.sh check_script predictor/predictor-run.sh
check_script architect/architect-run.sh
echo "function resolution check done" echo "function resolution check done"

View file

@ -8,19 +8,6 @@
when: when:
event: [push, pull_request] event: [push, pull_request]
# Override default clone to authenticate against Forgejo using FORGE_TOKEN.
# Required because Forgejo is configured with REQUIRE_SIGN_IN, so anonymous
# git clones fail with exit code 128. FORGE_TOKEN is injected globally via
# WOODPECKER_ENVIRONMENT in docker-compose.yml (generated by lib/generators.sh).
clone:
git:
image: alpine/git
commands:
- AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
- git clone --depth 1 "$AUTH_URL" .
- git fetch --depth 1 origin "$CI_COMMIT_REF"
- git checkout FETCH_HEAD
steps: steps:
- name: shellcheck - name: shellcheck
image: koalaman/shellcheck-alpine:stable image: koalaman/shellcheck-alpine:stable

View file

@ -179,17 +179,10 @@ def collect_findings(root):
Returns ``(ap_hits, dup_groups)`` with file paths relative to *root*. Returns ``(ap_hits, dup_groups)`` with file paths relative to *root*.
""" """
root = Path(root) root = Path(root)
# Skip architect scripts for duplicate detection (stub formulas, see #99) sh_files = sorted(
EXCLUDED_SUFFIXES = ("architect/architect-run.sh",) p for p in root.rglob("*.sh") if ".git" not in p.parts
def is_excluded(p):
"""Check if path should be excluded by suffix match."""
return p.suffix == ".sh" and ".git" not in p.parts and any(
str(p).endswith(suffix) for suffix in EXCLUDED_SUFFIXES
) )
sh_files = sorted(p for p in root.rglob("*.sh") if not is_excluded(p))
ap_hits = check_anti_patterns(sh_files) ap_hits = check_anti_patterns(sh_files)
dup_groups = check_duplicates(sh_files) dup_groups = check_duplicates(sh_files)
@ -245,55 +238,10 @@ def print_duplicates(groups, label=""):
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def main() -> int: def main() -> int:
# Skip architect scripts for duplicate detection (stub formulas, see #99) sh_files = sorted(
EXCLUDED_SUFFIXES = ("architect/architect-run.sh",) p for p in Path(".").rglob("*.sh") if ".git" not in p.parts
def is_excluded(p):
"""Check if path should be excluded by suffix match."""
return p.suffix == ".sh" and ".git" not in p.parts and any(
str(p).endswith(suffix) for suffix in EXCLUDED_SUFFIXES
) )
sh_files = sorted(p for p in Path(".").rglob("*.sh") if not is_excluded(p))
# Standard patterns that are intentionally repeated across formula-driven agents
# These are not copy-paste violations but the expected structure
ALLOWED_HASHES = {
# Standard agent header: shebang, set -euo pipefail, directory resolution
"c93baa0f19d6b9ba271428bf1cf20b45": "Standard agent header (set -euo pipefail, SCRIPT_DIR, FACTORY_ROOT)",
# formula_prepare_profile_context followed by scratch context reading
"eaa735b3598b7b73418845ab00d8aba5": "Standard .profile context setup (formula_prepare_profile_context + SCRATCH_CONTEXT)",
# Standard prompt template: GRAPH_SECTION, SCRATCH_CONTEXT, FORMULA_CONTENT, SCRATCH_INSTRUCTION
"2653705045fdf65072cccfd16eb04900": "Standard prompt template (GRAPH_SECTION, SCRATCH_CONTEXT, FORMULA_CONTENT)",
"93726a3c799b72ed2898a55552031921": "Standard prompt template continuation (SCRATCH_CONTEXT, FORMULA_CONTENT, SCRATCH_INSTRUCTION)",
"c11eaaacab69c9a2d3c38c75215eca84": "Standard prompt template end (FORMULA_CONTENT, SCRATCH_INSTRUCTION)",
# Appears in stack_lock_acquire (lib/stack-lock.sh) and lib/pr-lifecycle.sh
"29d4f34b703f44699237713cc8d8065b": "Structural end-of-while-loop+case (return 1, esac, done, closing brace)",
# Forgejo org-creation API call pattern shared between forge-setup.sh and ops-setup.sh
# Extracted from bin/disinto (not a .sh file, excluded from prior scans) into lib/forge-setup.sh
"059b11945140c172465f9126b829ed7f": "Forgejo org-creation curl pattern (forge-setup.sh + ops-setup.sh)",
# Docker compose environment block for agents service (generators.sh + hire-agent.sh)
# Intentional duplicate - both generate the same docker-compose.yml template
"8066210169a462fe565f18b6a26a57e0": "Docker compose environment block (generators.sh + hire-agent.sh) - old",
"fd978fcd726696e0f280eba2c5198d50": "Docker compose environment block continuation (generators.sh + hire-agent.sh) - old",
"e2760ccc2d4b993a3685bd8991594eb2": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old",
# The hash shown in output is 161a80f7 - need to match exactly what the script finds
"161a80f7296d6e9d45895607b7f5b9c9": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old",
# New hash after explicit environment fix (#381)
"83fa229b86a7fdcb1d3591ab8e718f9d": "Docker compose explicit environment block (generators.sh + hire-agent.sh) - #381",
# Verification mode helper functions - intentionally duplicated in dispatcher and entrypoint
# These functions check if bug-report parent issues have all sub-issues closed
"b783d403276f78b49ad35840845126a1": "Verification helper: sub_issues variable declaration",
"4b19b9a1bdfbc62f003fc237ed270ed9": "Verification helper: python3 -c invocation",
"cc1d0a9f85dfe0cc32e9ef6361cb8c3a": "Verification helper: Python imports and args",
"768926748b811ebd30f215f57db5de40": "Verification helper: json.load from /dev/stdin",
"4c58586a30bcf6b009c02010ed8f6256": "Verification helper: sub_issues list initialization",
"53ea3d6359f51d622467bd77b079cc88": "Verification helper: iterate issues in data",
"21aec56a99d5252b23fb9a38b895e8e8": "Verification helper: check body for Decomposed from pattern",
"60ea98b3604557d539193b2a6624e232": "Verification helper: append sub-issue number",
"9f6ae8e7811575b964279d8820494eb0": "Verification helper: for loop done pattern",
}
if not sh_files: if not sh_files:
print("No .sh files found.") print("No .sh files found.")
return 0 return 0
@ -328,13 +276,8 @@ def main() -> int:
# Duplicate diff: key by content hash # Duplicate diff: key by content hash
base_dup_hashes = {g[0] for g in base_dups} base_dup_hashes = {g[0] for g in base_dups}
# Filter out allowed standard patterns that are intentionally repeated new_dups = [g for g in cur_dups if g[0] not in base_dup_hashes]
new_dups = [ pre_dups = [g for g in cur_dups if g[0] in base_dup_hashes]
g for g in cur_dups
if g[0] not in base_dup_hashes and g[0] not in ALLOWED_HASHES
]
# Also filter allowed hashes from pre_dups for reporting
pre_dups = [g for g in cur_dups if g[0] in base_dup_hashes and g[0] not in ALLOWED_HASHES]
# Report pre-existing as info # Report pre-existing as info
if pre_ap or pre_dups: if pre_ap or pre_dups:

View file

@ -1,19 +1,45 @@
# .woodpecker/smoke-init.yml — End-to-end smoke test for disinto init
#
# Uses the Forgejo image directly (not as a service) so we have CLI
# access to set up Forgejo and create the bootstrap admin user.
# Then runs disinto init --bare --yes against the local Forgejo instance.
#
# Forgejo refuses to run as root, so all forgejo commands use su-exec
# to run as the 'git' user (pre-created in the Forgejo Docker image).
when: when:
- event: pull_request - event: pull_request
path: path:
- "bin/disinto" - "bin/disinto"
- "lib/load-project.sh" - "lib/load-project.sh"
- "lib/env.sh" - "tests/smoke-init.sh"
- "lib/generators.sh"
- "tests/**"
- ".woodpecker/smoke-init.yml" - ".woodpecker/smoke-init.yml"
- "docker/**"
- event: push
branch: main
path:
- "bin/disinto"
- "lib/load-project.sh"
- "tests/smoke-init.sh"
- ".woodpecker/smoke-init.yml"
- "docker/**"
steps: steps:
- name: smoke-init - name: smoke-init
image: python:3-alpine image: codeberg.org/forgejo/forgejo:11.0
environment:
SMOKE_FORGE_URL: http://localhost:3000
commands: commands:
- apk add --no-cache bash curl jq git coreutils # Install test dependencies (Alpine-based image)
- python3 tests/mock-forgejo.py & echo $! > /tmp/mock-forgejo.pid - apk add --no-cache bash curl jq python3 git >/dev/null 2>&1
- sleep 2 # Set up Forgejo data directories and config (owned by git user)
- mkdir -p /data/gitea/conf /data/gitea/repositories /data/gitea/lfs /data/gitea/log /data/git/.ssh /data/ssh
- printf '[database]\nDB_TYPE = sqlite3\nPATH = /data/gitea/forgejo.db\n\n[server]\nHTTP_PORT = 3000\nROOT_URL = http://localhost:3000/\nLFS_START_SERVER = false\n\n[security]\nINSTALL_LOCK = true\n\n[service]\nDISABLE_REGISTRATION = true\n' > /data/gitea/conf/app.ini
- chown -R git:git /data
# Start Forgejo as git user in background and wait for API
- su-exec git forgejo web --config /data/gitea/conf/app.ini &
- for i in $(seq 1 30); do curl -sf http://localhost:3000/api/v1/version >/dev/null 2>&1 && break; sleep 1; done
# Create bootstrap admin user via CLI
- su-exec git forgejo admin user create --admin --username setup-admin --password "SetupPass-789xyz" --email "setup-admin@smoke.test" --must-change-password=false --config /data/gitea/conf/app.ini
# Run the smoke test (as root is fine — only forgejo binary needs git user)
- bash tests/smoke-init.sh - bash tests/smoke-init.sh
- kill $(cat /tmp/mock-forgejo.pid) 2>/dev/null || true

116
AGENTS.md
View file

@ -1,19 +1,12 @@
<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 --> <!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
# Disinto — Agent Instructions # Disinto — Agent Instructions
## What this repo is ## What this repo is
Disinto is an autonomous code factory. It manages ten agents (dev, review, Disinto is an autonomous code factory. It manages eight agents (dev, review,
gardener, supervisor, planner, predictor, architect, reproduce, triage, edge gardener, supervisor, planner, predictor, action, vault) that pick up issues from forge,
dispatcher) that pick up issues from forge, implement them, review PRs, plan implement them, review PRs, plan from the vision, gate dangerous actions, and
from the vision, and keep the system healthy — all via a polling loop (`docker/agents/entrypoint.sh`) and `claude -p`. keep the system healthy — all via cron and `claude -p`.
The dispatcher executes formula-based operational tasks.
Each agent has a `.profile` repository on Forgejo that stores lessons learned
from prior sessions, providing continuous improvement across runs.
> **Note:** The vault is being redesigned as a PR-based approval workflow on the
> ops repo (see issues #73-#77). See [docs/VAULT.md](docs/VAULT.md) for details. Old vault scripts are being removed.
See `README.md` for the full architecture and `disinto-factory/SKILL.md` for setup. See `README.md` for the full architecture and `disinto-factory/SKILL.md` for setup.
@ -21,45 +14,30 @@ See `README.md` for the full architecture and `disinto-factory/SKILL.md` for set
``` ```
disinto/ (code repo) disinto/ (code repo)
├── dev/ dev-poll.sh, dev-agent.sh, phase-test.sh — issue implementation ├── dev/ dev-poll.sh, dev-agent.sh, phase-handler.sh — issue implementation
├── review/ review-poll.sh, review-pr.sh — PR review ├── review/ review-poll.sh, review-pr.sh — PR review
├── gardener/ gardener-run.sh — polling-loop executor for run-gardener formula ├── gardener/ gardener-run.sh — direct cron executor for run-gardener formula
│ best-practices.md — gardener best-practice reference ├── predictor/ predictor-run.sh — daily cron executor for run-predictor formula
│ pending-actions.json — queued gardener actions ├── planner/ planner-run.sh — direct cron executor for run-planner formula
├── predictor/ predictor-run.sh — polling-loop executor for run-predictor formula ├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper)
├── planner/ planner-run.sh — polling-loop executor for run-planner formula
├── supervisor/ supervisor-run.sh — formula-driven health monitoring (polling-loop executor)
│ preflight.sh — pre-flight data collection for supervisor formula │ preflight.sh — pre-flight data collection for supervisor formula
├── architect/ architect-run.sh — strategic decomposition of vision into sprints │ supervisor-poll.sh — legacy bash orchestrator (superseded)
├── vault/ vault-env.sh — shared env setup (vault redesign in progress, see #73-#77) ├── vault/ vault-poll.sh, vault-agent.sh, vault-fire.sh — action gating + procurement
│ SCHEMA.md — vault item schema documentation ├── action/ action-poll.sh, action-agent.sh — operational task execution
│ validate.sh — vault item validator ├── lib/ env.sh, agent-session.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, build-graph.py
│ examples/ — example vault action TOMLs (promote, publish, release, webhook-call)
├── lib/ env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py,
│ branch-protection.sh, secret-scan.sh, tea-helpers.sh, vault.sh, ci-log-reader.py
│ hooks/ — Claude Code session hooks (on-compact-reinject, on-idle-stop, on-phase-change, on-pretooluse-guard, on-session-end, on-stop-failure)
├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored) ├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored)
├── formulas/ Issue templates (TOML specs for multi-step agent tasks) ├── formulas/ Issue templates (TOML specs for multi-step agent tasks)
├── docker/ Dockerfiles and entrypoints for reproduce, triage, and edge dispatcher agents └── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md)
├── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md)
├── site/ disinto.ai website content
├── tests/ Test files (mock-forgejo.py, smoke-init.sh)
├── templates/ Issue templates
├── bin/ The `disinto` CLI script
├── disinto-factory/ Setup documentation and skill
├── state/ Runtime state
├── .woodpecker/ Woodpecker CI pipeline configs
├── VISION.md High-level project vision
└── CLAUDE.md Claude Code project instructions
disinto-ops/ (ops repo — {project}-ops) disinto-ops/ (ops repo — {project}-ops)
├── vault/ ├── vault/
│ ├── actions/ where vault action TOMLs land (core of vault workflow)
│ ├── pending/ vault items awaiting approval │ ├── pending/ vault items awaiting approval
│ ├── approved/ approved vault items │ ├── approved/ approved vault items
│ ├── fired/ executed vault items │ ├── fired/ executed vault items
│ └── rejected/ rejected vault items │ └── rejected/ rejected vault items
├── sprints/ sprint planning artifacts ├── journal/
│ ├── planner/ daily planning logs
│ └── supervisor/ operational health logs
├── knowledge/ shared agent knowledge + best practices ├── knowledge/ shared agent knowledge + best practices
├── evidence/ engagement data, experiment results ├── evidence/ engagement data, experiment results
├── portfolio.md addressables + observables ├── portfolio.md addressables + observables
@ -67,11 +45,10 @@ disinto-ops/ (ops repo — {project}-ops)
└── RESOURCES.md accounts, tokens (refs), infra inventory └── RESOURCES.md accounts, tokens (refs), infra inventory
``` ```
## Agent .profile Model > **Terminology note:** "Formulas" in this repo are TOML issue templates in `formulas/` that
> orchestrate multi-step agent tasks (e.g., `run-gardener.toml`, `run-planner.toml`). This is
Each agent has a `.profile` repository on Forgejo storing `knowledge/lessons-learned.md` (injected into each session prompt) and `journal/` reflection entries (digested into lessons). Pre-session: `formula_prepare_profile_context()` loads lessons. Post-session: `profile_write_journal` records reflections. See `lib/formula-session.sh`. > distinct from "processes" described in `docs/EVIDENCE-ARCHITECTURE.md`, which are measurement
> and mutation pipelines that read external platforms and write structured evidence to git.
> **Terminology note:** "Formulas" are TOML issue templates in `formulas/` that orchestrate multi-step agent tasks. Distinct from "processes" in `docs/EVIDENCE-ARCHITECTURE.md`.
## Tech stack ## Tech stack
@ -113,13 +90,8 @@ bash dev/phase-test.sh
| Supervisor | `supervisor/` | Health monitoring | [supervisor/AGENTS.md](supervisor/AGENTS.md) | | Supervisor | `supervisor/` | Health monitoring | [supervisor/AGENTS.md](supervisor/AGENTS.md) |
| Planner | `planner/` | Strategic planning | [planner/AGENTS.md](planner/AGENTS.md) | | Planner | `planner/` | Strategic planning | [planner/AGENTS.md](planner/AGENTS.md) |
| Predictor | `predictor/` | Infrastructure pattern detection | [predictor/AGENTS.md](predictor/AGENTS.md) | | Predictor | `predictor/` | Infrastructure pattern detection | [predictor/AGENTS.md](predictor/AGENTS.md) |
| Architect | `architect/` | Strategic decomposition | [architect/AGENTS.md](architect/AGENTS.md) | | Action | `action/` | Operational task execution | [action/AGENTS.md](action/AGENTS.md) |
| Reproduce | `docker/reproduce/` | Bug reproduction using Playwright MCP | `formulas/reproduce.toml` | | Vault | `vault/` | Action gating + resource procurement | [vault/AGENTS.md](vault/AGENTS.md) |
| Triage | `docker/reproduce/` | Deep root cause analysis | `formulas/triage.toml` |
| Edge dispatcher | `docker/edge/` | Polls ops repo for vault actions, executes via Claude sessions | `docker/edge/dispatcher.sh` |
> **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77).
> See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow details.
See [lib/AGENTS.md](lib/AGENTS.md) for the full shared helper reference. See [lib/AGENTS.md](lib/AGENTS.md) for the full shared helper reference.
@ -136,27 +108,34 @@ Issues flow: `backlog` → `in-progress` → PR → CI → review → merge →
| `backlog` | Issue is queued for implementation. Dev-poll picks the first ready one. | Planner, gardener, humans | | `backlog` | Issue is queued for implementation. Dev-poll picks the first ready one. | Planner, gardener, humans |
| `priority` | Queue tier above plain backlog. Issues with both `priority` and `backlog` are picked before plain `backlog` issues. FIFO within each tier. | Planner, humans | | `priority` | Queue tier above plain backlog. Issues with both `priority` and `backlog` are picked before plain `backlog` issues. FIFO within each tier. | Planner, humans |
| `in-progress` | Dev-agent is actively working on this issue. Only one issue per project is in-progress at a time. | dev-agent.sh (claims issue) | | `in-progress` | Dev-agent is actively working on this issue. Only one issue per project is in-progress at a time. | dev-agent.sh (claims issue) |
| `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, dev-poll.sh (on failure) | | `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, action-agent.sh, dev-poll.sh (on failure) |
| `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) | | `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) |
| `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) | | `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) |
| `bug-report` | Issue describes user-facing broken behavior with reproduction steps. Separate triage track for reproduction automation. | Gardener (bug-report detection in grooming) |
| `in-triage` | Bug reproduced but root cause not obvious — triage agent investigates. Set alongside `bug-report`. | reproduce-agent (when reproduction succeeds but cause unclear) |
| `rejected` | Issue formally rejected — cannot reproduce, out of scope, or invalid. | reproduce-agent, humans |
| `vision` | Goal anchors — high-level objectives from VISION.md. | Planner, humans | | `vision` | Goal anchors — high-level objectives from VISION.md. | Planner, humans |
| `prediction/unreviewed` | Unprocessed prediction filed by predictor. | predictor-run.sh | | `prediction/unreviewed` | Unprocessed prediction filed by predictor. | predictor-run.sh |
| `prediction/dismissed` | Prediction triaged as DISMISS — planner disagrees, closed with reason. | Planner (triage-predictions step) | | `prediction/dismissed` | Prediction triaged as DISMISS — planner disagrees, closed with reason. | Planner (triage-predictions step) |
| `prediction/actioned` | Prediction promoted or dismissed by planner. | Planner (triage-predictions step) | | `prediction/actioned` | Prediction promoted or dismissed by planner. | Planner (triage-predictions step) |
| `formula` | Issue is a formula-based operational task. Dev-poll skips these; dispatcher handles them. | Dispatcher (when dispatching formula tasks) | | `action` | Operational task for the action-agent to execute via formula. | Planner, humans |
### Dependency conventions ### Dependency conventions
Issues declare dependencies via `## Dependencies` / `## Depends on` sections listing `#N` refs. `lib/parse-deps.sh` extracts these; dev-poll only picks issues whose deps are all closed. See AD-002 for concurrency bounds per LLM backend. Issues declare dependencies in their body using a `## Dependencies` or
`## Depends on` section listing `#N` references. The dev-poll scheduler uses
`lib/parse-deps.sh` to extract these and only picks issues whose dependencies
are all closed.
### Single-threaded pipeline
Each project processes one issue at a time. Dev-poll will not start new work
while an open PR is waiting for CI or review. This keeps context clear and
prevents merge conflicts between concurrent changes.
--- ---
## Addressables and Observables ## Addressables
Concrete artifacts the factory has produced or is building. Observables have measurement wired — the gardener promotes addressables once an evidence process is connected. Concrete artifacts the factory has produced or is building. The gardener
maintains this table during grooming — see `formulas/run-gardener.toml`.
| Artifact | Location | Observable? | | Artifact | Location | Observable? |
|----------|----------|-------------| |----------|----------|-------------|
@ -165,6 +144,14 @@ Concrete artifacts the factory has produced or is building. Observables have mea
| Skill | ClawHub (in progress) | No | | Skill | ClawHub (in progress) | No |
| GitHub org | github.com/Disinto | No | | GitHub org | github.com/Disinto | No |
## Observables
Addressables with measurement wired — the factory can read structured
feedback from these. The gardener promotes addressables here once an
evidence process is connected.
None yet.
--- ---
## Architecture Decisions ## Architecture Decisions
@ -173,18 +160,17 @@ Humans write these. Agents read and enforce them.
| ID | Decision | Rationale | | ID | Decision | Rationale |
|---|---|---| |---|---|---|
| AD-001 | Nervous system runs from a polling loop (`docker/agents/entrypoint.sh`), not PR-based actions. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) | | AD-001 | Nervous system runs from cron, not action issues. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) |
| AD-002 | **Concurrency is bounded per LLM backend, not per project.** One concurrent Claude session per OAuth credential pool; one concurrent session per llama-server instance. Containers with disjoint backends may run in parallel. | The single-thread invariant is about *backends*, not pipelines. **(a) Anthropic OAuth credentials race on token refresh** — two sessions sharing one mounted `~/.claude` will trip over each other during rotation and 401. All agents inside an OAuth-mounted container serialize on `flock session.lock`. **(b) llama-server has finite VRAM and one KV cache** — parallel inference thrashes the cache and risks OOM. All llama-backed agents serialize on the same lock. **(c) Disjoint backends are free to parallelize.** Today `disinto-agents` (Anthropic OAuth, runs `review,gardener`) runs concurrently with `disinto-agents-llama` (llama, runs `dev`) on the same project — they share neither OAuth state nor llama VRAM. **(d) Per-project work-conflict safety** (no duplicate dev work, no merge conflicts on the same branch) is enforced by `issue_claim` (assignee + `in-progress` label) and per-issue worktrees — that's a separate guard that does NOT depend on this AD. | | AD-002 | Single-threaded pipeline per project. | One dev issue at a time. No new work while a PR awaits CI or review. Prevents merge conflicts and keeps context clear. |
| AD-003 | The runtime creates and destroys, the formula preserves. | Runtime manages worktrees/sessions/temp. Formulas commit knowledge to git before signaling done. | | AD-003 | The runtime creates and destroys, the formula preserves. | Runtime manages worktrees/sessions/temp. Formulas commit knowledge to git before signaling done. |
| AD-004 | Event-driven > polling > fixed delays. | Never `waitForTimeout` or hardcoded sleep. Use phase files, webhooks, or poll loops with backoff. | | AD-004 | Event-driven > polling > fixed delays. | Never `waitForTimeout` or hardcoded sleep. Use phase files, webhooks, or poll loops with backoff. |
| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (SOPS-encrypted when available; plaintext `.env`/`.env.vault` fallback supported). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. | | AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (both SOPS-encrypted). Referenced as `$VAR_NAME`. Vault-runner gets only vault secrets; agents get only agent secrets. |
| AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. (Vault redesign in progress: PR-based approval on ops repo, see #73-#77) | | AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral vault-runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. |
**Who enforces what:** **Who enforces what:**
- **Gardener** checks open backlog issues against ADs during grooming; closes violations with a comment referencing the AD number. - **Gardener** checks open backlog issues against ADs during grooming; closes violations with a comment referencing the AD number.
- **Planner** plans within the architecture; does not create issues that violate ADs. - **Planner** plans within the architecture; does not create issues that violate ADs.
- **Dev-agent** reads AGENTS.md before implementing; refuses work that violates ADs. - **Dev-agent** reads AGENTS.md before implementing; refuses work that violates ADs.
- **AD-002 is a runtime invariant; nothing for the gardener to check at issue-groom time.** Concurrency is enforced by `flock session.lock` within each container and by `issue_claim` for per-issue work. A violation manifests as a 401 or VRAM OOM in agent logs, not as a malformed issue.
--- ---

View file

@ -21,29 +21,25 @@ Point it at a git repo with a Woodpecker CI pipeline and it will pick up issues,
## Architecture ## Architecture
``` ```
entrypoint.sh (while-true polling loop, 5 min base interval) cron (*/10) ──→ supervisor-poll.sh ← supervisor (bash checks, zero tokens)
├── every 5 min ──→ review-poll.sh ← finds unreviewed PRs, spawns review
│ └── review-pr.sh ← claude -p: review → approve/request changes
├── every 5 min ──→ dev-poll.sh ← pulls ready issues, spawns dev-agent
│ └── dev-agent.sh ← claude -p: implement → PR → CI → review → merge
├── every 6h ────→ gardener-run.sh ← backlog grooming (duplicates, stale, tech-debt)
│ └── claude -p: triage → promote/close/escalate
├── every 6h ────→ architect-run.sh ← strategic decomposition of vision into sprints
├── every 12h ───→ planner-run.sh ← gap-analyse VISION.md, create backlog issues
│ └── claude -p: update AGENTS.md → create issues
└── every 24h ───→ predictor-run.sh ← infrastructure pattern detection
entrypoint-edge.sh (edge container)
├── dispatcher.sh ← polls ops repo for vault actions
└── every 20 min → supervisor-run.sh ← health checks (bash checks, zero tokens)
├── all clear? → exit 0 ├── all clear? → exit 0
└── problem? → claude -p (diagnose, fix, or escalate) └── problem? → claude -p (diagnose, fix, or escalate)
cron (*/10) ──→ dev-poll.sh ← pulls ready issues, spawns dev-agent
└── dev-agent.sh ← claude -p: implement → PR → CI → review → merge
cron (*/10) ──→ review-poll.sh ← finds unreviewed PRs, spawns review
└── review-pr.sh ← claude -p: review → approve/request changes
cron (daily) ──→ gardener-poll.sh ← backlog grooming (duplicates, stale, tech-debt)
└── claude -p: triage → promote/close/escalate
cron (weekly) ──→ planner-poll.sh ← gap-analyse VISION.md, create backlog issues
└── claude -p: update AGENTS.md → create issues
cron (*/30) ──→ vault-poll.sh ← safety gate for dangerous/irreversible actions
└── claude -p: classify → auto-approve/reject or escalate
``` ```
## Prerequisites ## Prerequisites
@ -93,11 +89,18 @@ CLAUDE_TIMEOUT=7200 # max seconds per Claude invocation (default: 2h)
``` ```
```bash ```bash
# 3. Start the agent and edge containers # 3. Install cron (staggered to avoid overlap)
docker compose up -d crontab -e
# Add:
# 0,10,20,30,40,50 * * * * /path/to/disinto/supervisor/supervisor-poll.sh
# 3,13,23,33,43,53 * * * * /path/to/disinto/review/review-poll.sh
# 6,16,26,36,46,56 * * * * /path/to/disinto/dev/dev-poll.sh
# 15 8 * * * /path/to/disinto/gardener/gardener-poll.sh
# 0,30 * * * * /path/to/disinto/vault/vault-poll.sh
# 0 9 * * 1 /path/to/disinto/planner/planner-poll.sh
# 4. Verify the entrypoint loop is running # 4. Verify
docker exec disinto-agents-1 tail -f /home/agent/data/agent-entrypoint.log bash supervisor/supervisor-poll.sh # should log "all clear"
``` ```
## Directory Structure ## Directory Structure
@ -110,23 +113,26 @@ disinto/
│ ├── env.sh # Shared: load .env, PATH, API helpers │ ├── env.sh # Shared: load .env, PATH, API helpers
│ └── ci-debug.sh # Woodpecker CI log/failure helper │ └── ci-debug.sh # Woodpecker CI log/failure helper
├── dev/ ├── dev/
│ ├── dev-poll.sh # Poll: find ready issues │ ├── dev-poll.sh # Cron entry: find ready issues
│ └── dev-agent.sh # Implementation agent (claude -p) │ └── dev-agent.sh # Implementation agent (claude -p)
├── review/ ├── review/
│ ├── review-poll.sh # Poll: find unreviewed PRs │ ├── review-poll.sh # Cron entry: find unreviewed PRs
│ └── review-pr.sh # Review agent (claude -p) │ └── review-pr.sh # Review agent (claude -p)
├── gardener/ ├── gardener/
│ ├── gardener-run.sh # Executor: backlog grooming │ ├── gardener-poll.sh # Cron entry: backlog grooming
│ └── best-practices.md # Gardener knowledge base │ └── best-practices.md # Gardener knowledge base
├── planner/ ├── planner/
│ ├── planner-run.sh # Executor: vision gap analysis │ ├── planner-poll.sh # Cron entry: weekly vision gap analysis
│ └── (formula-driven) # run-planner.toml executed by dispatcher │ └── (formula-driven) # run-planner.toml executed by action-agent
├── vault/ ├── vault/
│ └── vault-env.sh # Shared env setup (vault redesign in progress, see #73-#77) │ ├── vault-poll.sh # Cron entry: process pending dangerous actions
├── docs/ │ ├── vault-agent.sh # Classifies and routes actions (claude -p)
│ └── VAULT.md # Vault PR workflow and branch protection documentation │ ├── vault-fire.sh # Executes an approved action
│ ├── vault-reject.sh # Marks an action as rejected
│ └── PROMPT.md # System prompt for vault agent
└── supervisor/ └── supervisor/
├── supervisor-poll.sh # Supervisor: health checks + claude -p ├── supervisor-poll.sh # Supervisor: health checks + claude -p
├── PROMPT.md # Supervisor's system prompt
├── update-prompt.sh # Self-learning: append to best-practices ├── update-prompt.sh # Self-learning: append to best-practices
└── best-practices/ # Progressive disclosure knowledge base └── best-practices/ # Progressive disclosure knowledge base
├── memory.md ├── memory.md
@ -142,14 +148,12 @@ disinto/
| Agent | Trigger | Job | | Agent | Trigger | Job |
|-------|---------|-----| |-------|---------|-----|
| **Supervisor** | Every 20 min | Health checks (RAM, disk, CI, git). Calls Claude only when something is broken. Self-improving via `best-practices/`. | | **Supervisor** | Every 10 min | Health checks (RAM, disk, CI, git). Calls Claude only when something is broken. Self-improving via `best-practices/`. |
| **Dev** | Every 5 min | Picks up `backlog`-labeled issues, creates a branch, implements, opens a PR, monitors CI, responds to review, merges. | | **Dev** | Every 10 min | Picks up `backlog`-labeled issues, creates a branch, implements, opens a PR, monitors CI, responds to review, merges. |
| **Review** | Every 5 min | Finds PRs without review, runs Claude-powered code review, approves or requests changes. | | **Review** | Every 10 min | Finds PRs without review, runs Claude-powered code review, approves or requests changes. |
| **Gardener** | Every 6h | Grooms the issue backlog: detects duplicates, promotes `tech-debt` to `backlog`, closes stale issues, escalates ambiguous items. | | **Gardener** | Daily | Grooms the issue backlog: detects duplicates, promotes `tech-debt` to `backlog`, closes stale issues, escalates ambiguous items. |
| **Planner** | Every 12h | Updates AGENTS.md documentation to reflect recent code changes, then gap-analyses VISION.md vs current state and creates up to 5 backlog issues for the highest-leverage gaps. | | **Planner** | Weekly | Updates AGENTS.md documentation to reflect recent code changes, then gap-analyses VISION.md vs current state and creates up to 5 backlog issues for the highest-leverage gaps. |
| **Vault** | Every 30 min | Safety gate for dangerous or irreversible actions. Classifies pending actions via Claude: auto-approve, auto-reject, or escalate to a human via vault/forge. |
> **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77).
> See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow and branch protection details.
## Design Principles ## Design Principles

34
action/AGENTS.md Normal file
View file

@ -0,0 +1,34 @@
<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
# Action Agent
**Role**: Execute operational tasks described by action formulas — run scripts,
call APIs, send messages, collect human approval. Shares the same phase handler
as the dev-agent: if an action produces code changes, the orchestrator creates a
PR and drives the CI/review loop; otherwise Claude closes the issue directly.
**Trigger**: `action-poll.sh` runs every 10 min via cron. Sources `lib/guard.sh`
and calls `check_active action` first — skips if `$FACTORY_ROOT/state/.action-active`
is absent. Then scans for open issues labeled `action` that have no active tmux
session, and spawns `action-agent.sh <issue-number>`.
**Key files**:
- `action/action-poll.sh` — Cron scheduler: finds open action issues with no active tmux session, spawns action-agent.sh
- `action/action-agent.sh` — Orchestrator: fetches issue body + prior comments, **checks all dependencies via `lib/parse-deps.sh` before spawning** (skips silently if any dep is still open), creates tmux session (`action-{project}-{issue_num}`) with interactive `claude`, injects formula prompt with phase protocol, enters `monitor_phase_loop` (shared via `dev/phase-handler.sh`) for CI/review lifecycle or direct completion
**Session lifecycle**:
1. `action-poll.sh` finds open `action` issues with no active tmux session.
2. Spawns `action-agent.sh <issue_num>`.
3. Agent creates tmux session `action-{project}-{issue_num}`, injects prompt (formula + prior comments + phase protocol).
4. Agent enters `monitor_phase_loop` (shared with dev-agent via `dev/phase-handler.sh`).
5. **Path A (git output):** Claude pushes branch → `PHASE:awaiting_ci` → handler creates PR, polls CI → injects failures → Claude fixes → push → re-poll → CI passes → `PHASE:awaiting_review` → handler polls reviews → injects REQUEST_CHANGES → Claude fixes → approved → merge → cleanup.
6. **Path B (no git output):** Claude posts results as comment, closes issue → `PHASE:done` → handler cleans up (kill session, docker compose down, remove temp files).
7. For human input: Claude writes `PHASE:escalate`; human responds via vault/forge.
**Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h.
**Environment variables consumed**:
- `FORGE_TOKEN`, `FORGE_ACTION_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `FORGE_URL`, `PROJECT_NAME`, `FORGE_WEB`
- `ACTION_IDLE_TIMEOUT` — Max seconds before killing idle session (default 14400 = 4h)
- `ACTION_MAX_LIFETIME` — Max total session wall-clock seconds (default 28800 = 8h); caps session independently of idle timeout
**FORGE_REMOTE**: `action-agent.sh` auto-detects the git remote for `FORGE_URL` (same logic as dev-agent). Exported as `FORGE_REMOTE`, used for worktree creation and push instructions injected into the Claude prompt.

323
action/action-agent.sh Executable file
View file

@ -0,0 +1,323 @@
#!/usr/bin/env bash
# =============================================================================
# action-agent.sh — Synchronous action agent: SDK + shared libraries
#
# Synchronous bash loop using claude -p (one-shot invocation).
# No tmux sessions, no phase files — the bash script IS the state machine.
#
# Usage: ./action-agent.sh <issue-number> [project.toml]
#
# Flow:
# 1. Preflight: issue_check_deps(), memory guard, concurrency lock
# 2. Parse model from YAML front matter in issue body (custom model selection)
# 3. Worktree: worktree_create() for action isolation
# 4. Load formula from issue body
# 5. Build prompt: formula + prior non-bot comments (resume context)
# 6. agent_run(worktree, prompt) → Claude executes action, may push
# 7. If pushed: pr_walk_to_merge() from lib/pr-lifecycle.sh
# 8. Cleanup: worktree_cleanup(), issue_close()
#
# Action-specific (stays in runner):
# - YAML front matter parsing (model selection)
# - Bot username filtering for prior comments
# - Lifetime watchdog (MAX_LIFETIME=8h wall-clock cap)
# - Child process cleanup (docker compose, background jobs)
#
# From shared libraries:
# - Issue lifecycle: lib/issue-lifecycle.sh
# - Worktree: lib/worktree.sh
# - PR lifecycle: lib/pr-lifecycle.sh
# - Agent SDK: lib/agent-sdk.sh
#
# Log: action/action-poll-{project}.log
# =============================================================================
set -euo pipefail
ISSUE="${1:?Usage: action-agent.sh <issue-number> [project.toml]}"
export PROJECT_TOML="${2:-${PROJECT_TOML:-}}"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
# shellcheck source=../lib/env.sh
source "$FACTORY_ROOT/lib/env.sh"
# Use action-bot's own Forgejo identity (#747)
FORGE_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}"
# shellcheck source=../lib/ci-helpers.sh
source "$FACTORY_ROOT/lib/ci-helpers.sh"
# shellcheck source=../lib/worktree.sh
source "$FACTORY_ROOT/lib/worktree.sh"
# shellcheck source=../lib/issue-lifecycle.sh
source "$FACTORY_ROOT/lib/issue-lifecycle.sh"
# shellcheck source=../lib/agent-sdk.sh
source "$FACTORY_ROOT/lib/agent-sdk.sh"
# shellcheck source=../lib/pr-lifecycle.sh
source "$FACTORY_ROOT/lib/pr-lifecycle.sh"
BRANCH="action/issue-${ISSUE}"
WORKTREE="/tmp/action-${ISSUE}-$(date +%s)"
LOCKFILE="/tmp/action-agent-${ISSUE}.lock"
LOGFILE="${DISINTO_LOG_DIR}/action/action-poll-${PROJECT_NAME:-default}.log"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
SID_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.sid"
MAX_LIFETIME="${ACTION_MAX_LIFETIME:-28800}" # 8h default wall-clock cap
SESSION_START_EPOCH=$(date +%s)
log() {
printf '[%s] action#%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$ISSUE" "$*" >> "$LOGFILE"
}
# --- Concurrency lock (per issue) ---
if [ -f "$LOCKFILE" ]; then
LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "")
if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then
log "SKIP: action-agent already running for #${ISSUE} (PID ${LOCK_PID})"
exit 0
fi
rm -f "$LOCKFILE"
fi
echo $$ > "$LOCKFILE"
cleanup() {
local exit_code=$?
# Kill lifetime watchdog if running
if [ -n "${LIFETIME_WATCHDOG_PID:-}" ] && kill -0 "$LIFETIME_WATCHDOG_PID" 2>/dev/null; then
kill "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true
wait "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true
fi
rm -f "$LOCKFILE"
# Kill any remaining child processes spawned during the run
local children
children=$(jobs -p 2>/dev/null) || true
if [ -n "$children" ]; then
# shellcheck disable=SC2086 # intentional word splitting
kill $children 2>/dev/null || true
# shellcheck disable=SC2086
wait $children 2>/dev/null || true
fi
# Best-effort docker cleanup for containers started during this action
(cd "${WORKTREE}" 2>/dev/null && docker compose down 2>/dev/null) || true
# Preserve worktree on crash for debugging; clean up on success
if [ "$exit_code" -ne 0 ]; then
worktree_preserve "$WORKTREE" "crashed (exit=$exit_code)"
else
worktree_cleanup "$WORKTREE"
fi
rm -f "$SID_FILE"
}
trap cleanup EXIT
# --- Memory guard ---
memory_guard 2000
# --- Fetch issue ---
log "fetching issue #${ISSUE}"
ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${ISSUE}") || true
if [ -z "$ISSUE_JSON" ] || ! printf '%s' "$ISSUE_JSON" | jq -e '.id' >/dev/null 2>&1; then
log "ERROR: failed to fetch issue #${ISSUE}"
exit 1
fi
ISSUE_TITLE=$(printf '%s' "$ISSUE_JSON" | jq -r '.title')
ISSUE_BODY=$(printf '%s' "$ISSUE_JSON" | jq -r '.body // ""')
ISSUE_STATE=$(printf '%s' "$ISSUE_JSON" | jq -r '.state')
if [ "$ISSUE_STATE" != "open" ]; then
log "SKIP: issue #${ISSUE} is ${ISSUE_STATE}"
exit 0
fi
log "Issue: ${ISSUE_TITLE}"
# --- Dependency check (shared library) ---
if ! issue_check_deps "$ISSUE"; then
log "SKIP: issue #${ISSUE} blocked by: ${_ISSUE_BLOCKED_BY[*]}"
exit 0
fi
# --- Extract model from YAML front matter (if present) ---
YAML_MODEL=$(printf '%s' "$ISSUE_BODY" | \
sed -n '/^---$/,/^---$/p' | grep '^model:' | awk '{print $2}' | tr -d '"' || true)
if [ -n "$YAML_MODEL" ]; then
export CLAUDE_MODEL="$YAML_MODEL"
log "model from front matter: ${YAML_MODEL}"
fi
# --- Resolve bot username(s) for comment filtering ---
_bot_login=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API%%/repos*}/user" | jq -r '.login // empty' 2>/dev/null || true)
# Build list: token owner + any extra names from FORGE_BOT_USERNAMES (comma-separated)
_bot_logins="${_bot_login}"
if [ -n "${FORGE_BOT_USERNAMES:-}" ]; then
_bot_logins="${_bot_logins:+${_bot_logins},}${FORGE_BOT_USERNAMES}"
fi
# --- Fetch existing comments (resume context, excluding bot comments) ---
COMMENTS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${ISSUE}/comments?limit=50") || true
PRIOR_COMMENTS=""
if [ -n "$COMMENTS_JSON" ] && [ "$COMMENTS_JSON" != "null" ] && [ "$COMMENTS_JSON" != "[]" ]; then
PRIOR_COMMENTS=$(printf '%s' "$COMMENTS_JSON" | \
jq -r --arg bots "$_bot_logins" \
'($bots | split(",") | map(select(. != ""))) as $bl |
.[] | select(.user.login as $u | $bl | index($u) | not) |
"[\(.user.login) at \(.created_at[:19])]\n\(.body)\n---"' 2>/dev/null || true)
fi
# --- Determine git remote ---
cd "${PROJECT_REPO_ROOT}"
_forge_host=$(echo "$FORGE_URL" | sed 's|https\?://||; s|/.*||')
FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}')
FORGE_REMOTE="${FORGE_REMOTE:-origin}"
export FORGE_REMOTE
# --- Create isolated worktree ---
log "creating worktree: ${WORKTREE}"
git fetch "${FORGE_REMOTE}" "${PRIMARY_BRANCH}" 2>/dev/null || true
if ! worktree_create "$WORKTREE" "$BRANCH"; then
log "ERROR: worktree creation failed"
exit 1
fi
log "worktree ready: ${WORKTREE}"
# --- Build prompt ---
PRIOR_SECTION=""
if [ -n "$PRIOR_COMMENTS" ]; then
PRIOR_SECTION="## Prior comments (resume context)
${PRIOR_COMMENTS}
"
fi
GIT_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE")
PROMPT="You are an action agent. Your job is to execute the action formula
in the issue below.
## Issue #${ISSUE}: ${ISSUE_TITLE}
${ISSUE_BODY}
${PRIOR_SECTION}## Instructions
1. Read the action formula steps in the issue body carefully.
2. Execute each step in order using your Bash tool and any other tools available.
3. Post progress as comments on issue #${ISSUE} after significant steps:
curl -sf -X POST \\
-H \"Authorization: token \${FORGE_TOKEN}\" \\
-H 'Content-Type: application/json' \\
\"${FORGE_API}/issues/${ISSUE}/comments\" \\
-d \"{\\\"body\\\": \\\"your comment here\\\"}\"
4. If a step requires human input or approval, post a comment explaining what
is needed and stop — the orchestrator will block the issue.
### Path A: If this action produces code changes (e.g. config updates, baselines):
- You are already in an isolated worktree at: ${WORKTREE}
- You are on branch: ${BRANCH}
- Make your changes, commit, and push: git push ${FORGE_REMOTE} ${BRANCH}
- **IMPORTANT:** The worktree is destroyed after completion. Push all
results before finishing — unpushed work will be lost.
### Path B: If this action produces no code changes (investigation, report):
- Post results as a comment on issue #${ISSUE}.
- **IMPORTANT:** The worktree is destroyed after completion. Copy any
files you need to persistent paths before finishing.
5. Environment variables available in your bash sessions:
FORGE_TOKEN, FORGE_API, FORGE_REPO, FORGE_WEB, PROJECT_NAME
(all sourced from ${FACTORY_ROOT}/.env)
### CRITICAL: Never embed secrets in issue bodies, comments, or PR descriptions
- NEVER put API keys, tokens, passwords, or private keys in issue text or comments.
- Always reference secrets via env var names (e.g. \\\$BASE_RPC_URL, \\\${FORGE_TOKEN}).
- If a formula step needs a secret, read it from .env or the environment at runtime.
- Before posting any comment, verify it contains no credentials, hex keys > 32 chars,
or URLs with embedded API keys.
If the prior comments above show work already completed, resume from where it
left off.
${GIT_INSTRUCTIONS}"
# --- Wall-clock lifetime watchdog (background) ---
# Caps total run time independently of claude -p timeout. When the cap is
# hit the watchdog kills the main process, which triggers cleanup via trap.
_lifetime_watchdog() {
local remaining=$(( MAX_LIFETIME - ($(date +%s) - SESSION_START_EPOCH) ))
[ "$remaining" -le 0 ] && remaining=1
sleep "$remaining"
local hours=$(( MAX_LIFETIME / 3600 ))
log "MAX_LIFETIME (${hours}h) reached — killing agent"
# Post summary comment on issue
local body="Action agent killed: wall-clock lifetime cap (${hours}h) reached."
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${FORGE_API}/issues/${ISSUE}/comments" \
-d "{\"body\": \"${body}\"}" >/dev/null 2>&1 || true
kill $$ 2>/dev/null || true
}
_lifetime_watchdog &
LIFETIME_WATCHDOG_PID=$!
# --- Run agent ---
log "running agent (worktree: ${WORKTREE})"
agent_run --worktree "$WORKTREE" "$PROMPT"
log "agent_run complete"
# --- Detect if branch was pushed (Path A vs Path B) ---
PUSHED=false
# Check if remote branch exists
git fetch "${FORGE_REMOTE}" "$BRANCH" 2>/dev/null || true
if git rev-parse --verify "${FORGE_REMOTE}/${BRANCH}" >/dev/null 2>&1; then
PUSHED=true
fi
# Fallback: check local commits ahead of base
if [ "$PUSHED" = false ]; then
if git -C "$WORKTREE" log "${FORGE_REMOTE}/${PRIMARY_BRANCH}..${BRANCH}" --oneline 2>/dev/null | grep -q .; then
PUSHED=true
fi
fi
if [ "$PUSHED" = true ]; then
# --- Path A: code changes pushed — create PR and walk to merge ---
log "branch pushed — creating PR"
PR_NUMBER=""
PR_NUMBER=$(pr_create "$BRANCH" "action: ${ISSUE_TITLE}" \
"Closes #${ISSUE}
Automated action execution by action-agent.") || true
if [ -n "$PR_NUMBER" ]; then
log "walking PR #${PR_NUMBER} to merge"
pr_walk_to_merge "$PR_NUMBER" "$_AGENT_SESSION_ID" "$WORKTREE" || true
case "${_PR_WALK_EXIT_REASON:-}" in
merged)
log "PR #${PR_NUMBER} merged — closing issue"
issue_close "$ISSUE"
;;
*)
log "PR #${PR_NUMBER} not merged (reason: ${_PR_WALK_EXIT_REASON:-unknown})"
issue_block "$ISSUE" "pr_not_merged: ${_PR_WALK_EXIT_REASON:-unknown}"
;;
esac
else
log "ERROR: failed to create PR"
issue_block "$ISSUE" "pr_creation_failed"
fi
else
# --- Path B: no code changes — close issue directly ---
log "no branch pushed — closing issue (Path B)"
issue_close "$ISSUE"
fi
log "action-agent finished for issue #${ISSUE}"

75
action/action-poll.sh Executable file
View file

@ -0,0 +1,75 @@
#!/usr/bin/env bash
# action-poll.sh — Cron scheduler: find open 'action' issues, spawn action-agent
#
# An issue is ready for action if:
# - It is open and labeled 'action'
# - No tmux session named action-{project}-{issue_num} is already active
#
# Usage:
# cron every 10min
# action-poll.sh [projects/foo.toml] # optional project config
set -euo pipefail
export PROJECT_TOML="${1:-}"
source "$(dirname "$0")/../lib/env.sh"
# Use action-bot's own Forgejo identity (#747)
FORGE_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}"
# shellcheck source=../lib/guard.sh
source "$(dirname "$0")/../lib/guard.sh"
check_active action
LOGFILE="${DISINTO_LOG_DIR}/action/action-poll-${PROJECT_NAME:-default}.log"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
log() {
printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
}
# --- Memory guard ---
memory_guard 2000
# --- Find open 'action' issues ---
log "scanning for open action issues"
ACTION_ISSUES=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues?state=open&labels=action&limit=50&type=issues") || true
if [ -z "$ACTION_ISSUES" ] || [ "$ACTION_ISSUES" = "null" ]; then
log "no action issues found"
exit 0
fi
COUNT=$(printf '%s' "$ACTION_ISSUES" | jq 'length')
if [ "$COUNT" -eq 0 ]; then
log "no action issues found"
exit 0
fi
log "found ${COUNT} open action issue(s)"
# Spawn action-agent for each issue that has no active tmux session.
# Only one agent is spawned per poll to avoid memory pressure; the next
# poll picks up remaining issues.
for i in $(seq 0 $((COUNT - 1))); do
ISSUE_NUM=$(printf '%s' "$ACTION_ISSUES" | jq -r ".[$i].number")
SESSION="action-${PROJECT_NAME}-${ISSUE_NUM}"
if tmux has-session -t "$SESSION" 2>/dev/null; then
log "issue #${ISSUE_NUM}: session ${SESSION} already active, skipping"
continue
fi
LOCKFILE="/tmp/action-agent-${ISSUE_NUM}.lock"
if [ -f "$LOCKFILE" ]; then
LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "")
if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then
log "issue #${ISSUE_NUM}: agent starting (PID ${LOCK_PID}), skipping"
continue
fi
fi
log "spawning action-agent for issue #${ISSUE_NUM}"
nohup "${SCRIPT_DIR}/action-agent.sh" "$ISSUE_NUM" "$PROJECT_TOML" >> "$LOGFILE" 2>&1 &
log "started action-agent PID $! for issue #${ISSUE_NUM}"
break
done

View file

@ -1,109 +0,0 @@
<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 -->
# Architect — Agent Instructions
## What this agent is
The architect is a strategic decomposition agent that breaks down vision issues
into development sprints. It proposes sprints via PRs on the ops repo and
converses with humans through PR comments.
## Role
- **Input**: Vision issues from VISION.md, prerequisite tree from ops repo
- **Output**: Sprint proposals as PRs on the ops repo, sub-issue files
- **Mechanism**: Bash-driven orchestration in `architect-run.sh`, pitching formula via `formulas/run-architect.toml`
- **Identity**: `architect-bot` on Forgejo
## Responsibilities
1. **Strategic decomposition**: Break down large vision items into coherent
sprints that can be executed by the dev agent
2. **Design fork identification**: When multiple implementation approaches exist,
identify the forks and file sub-issues for each path
3. **Sprint PR creation**: Propose sprints as PRs on the ops repo with clear
acceptance criteria and dependencies
4. **Human conversation**: Respond to PR comments, refine sprint proposals based
on human feedback
5. **Sub-issue filing**: After design forks are resolved, file concrete sub-issues
for implementation
## Formula
The architect pitching is driven by `formulas/run-architect.toml`. This formula defines
the steps for:
- Research: analyzing vision items and prerequisite tree
- Pitch: creating structured sprint PRs
- Sub-issue filing: creating concrete implementation issues
## Bash-driven orchestration
Bash in `architect-run.sh` handles state detection and orchestration:
- **Deterministic state detection**: Bash reads the Forgejo reviews API to detect
ACCEPT/REJECT decisions — no model-dependent API parsing
- **Human guidance injection**: Review body text from ACCEPT reviews is injected
directly into the research prompt as context
- **Response processing**: When ACCEPT/REJECT responses are detected, bash invokes
the agent with appropriate context (session resumed for questions phase)
### State transitions
```
New vision issue → pitch PR (model generates pitch, bash creates PR)
APPROVED review → start design questions (model posts Q1:, adds Design forks section)
Answers received → continue Q&A (model processes answers, posts follow-ups)
All forks resolved → sub-issue filing (model files implementation issues)
REJECT review → close PR + journal (model processes rejection, bash merges PR)
```
### Session management
The agent maintains a global session file at `/tmp/architect-session-{project}.sid`.
When processing responses, bash checks if the PR is in the questions phase and
resumes the session using `--resume session_id` to preserve codebase context.
## Execution
Run via `architect/architect-run.sh`, which:
- Acquires a poll-loop lock (via `acquire_lock`) and checks available memory
- Cleans up per-issue scratch files from previous runs (`/tmp/architect-{project}-scratch-*.md`)
- Sources shared libraries (env.sh, formula-session.sh)
- Uses FORGE_ARCHITECT_TOKEN for authentication
- Processes existing architect PRs via bash-driven design phase
- Loads the formula and builds context from VISION.md, AGENTS.md, and ops repo
- Bash orchestrates state management:
- Fetches open vision issues, open architect PRs, and merged sprint PRs from Forgejo API
- Filters out visions already with open PRs, in-progress label, sub-issues, or merged sprint PRs
- Selects up to `pitch_budget` (3 - open architect PRs) remaining vision issues
- For each selected issue, invokes stateless `claude -p` with issue body + context
- Creates PRs directly from pitch content (no scratch files)
- Agent is invoked only for response processing (ACCEPT/REJECT handling)
**Multi-sprint pitching**: The architect pitches up to 3 sprints per run. Bash handles all state management:
- Fetches Forgejo API data (vision issues, open PRs, merged PRs)
- Filters and deduplicates (no model-level dedup or journal-based memory)
- For each selected vision issue, bash invokes stateless `claude -p` to generate pitch markdown
- Bash creates the PR with pitch content and posts ACCEPT/REJECT footer comment
- Branch names use issue number (architect/sprint-vision-{issue_number}) to avoid collisions
## Schedule
The architect runs every 6 hours as part of the polling loop in
`docker/agents/entrypoint.sh` (iteration math at line 196-208).
## State
Architect state is tracked in `state/.architect-active` (disabled by default —
empty file not created, just document it).
## Related issues
- #96: Architect agent parent issue
- #100: Architect formula — research + design fork identification
- #101: Architect formula — sprint PR creation with questions
- #102: Architect formula — answer parsing + sub-issue filing
- #491: Refactor — bash-driven design phase with stateful session resumption

View file

@ -1,903 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# architect-run.sh — Polling-loop wrapper: architect execution via SDK + formula
#
# Synchronous bash loop using claude -p (one-shot invocation).
# No tmux sessions, no phase files — the bash script IS the state machine.
#
# Flow:
# 1. Guards: run lock, memory check
# 2. Precondition checks: skip if no work (no vision issues, no responses)
# 3. Load formula (formulas/run-architect.toml)
# 4. Context: VISION.md, AGENTS.md, ops:prerequisites.md, structural graph
# 5. Stateless pitch generation: for each selected issue:
# - Fetch issue body from Forgejo API (bash)
# - Invoke claude -p with issue body + context (stateless, no API calls)
# - Create PR with pitch content (bash)
# - Post footer comment (bash)
# 6. Response processing: handle ACCEPT/REJECT on existing PRs
#
# Precondition checks (bash before model):
# - Skip if no vision issues AND no open architect PRs
# - Skip if 3+ architect PRs open AND no ACCEPT/REJECT responses to process
# - Only invoke model when there's actual work: new pitches or response processing
#
# Usage:
# architect-run.sh [projects/disinto.toml] # project config (default: disinto)
#
# Called by: entrypoint.sh polling loop (every 6 hours)
# =============================================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
# Accept project config from argument; default to disinto
export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}"
# shellcheck source=../lib/env.sh
source "$FACTORY_ROOT/lib/env.sh"
# Override FORGE_TOKEN with architect-bot's token (#747)
FORGE_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}"
# shellcheck source=../lib/formula-session.sh
source "$FACTORY_ROOT/lib/formula-session.sh"
# shellcheck source=../lib/worktree.sh
source "$FACTORY_ROOT/lib/worktree.sh"
# shellcheck source=../lib/guard.sh
source "$FACTORY_ROOT/lib/guard.sh"
# shellcheck source=../lib/agent-sdk.sh
source "$FACTORY_ROOT/lib/agent-sdk.sh"
LOG_FILE="${DISINTO_LOG_DIR}/architect/architect.log"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
LOGFILE="$LOG_FILE"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
SID_FILE="/tmp/architect-session-${PROJECT_NAME}.sid"
# Per-PR session files for stateful resumption across runs
SID_DIR="/tmp/architect-sessions-${PROJECT_NAME}"
mkdir -p "$SID_DIR"
SCRATCH_FILE="/tmp/architect-${PROJECT_NAME}-scratch.md"
SCRATCH_FILE_PREFIX="/tmp/architect-${PROJECT_NAME}-scratch"
WORKTREE="/tmp/${PROJECT_NAME}-architect-run"
# Override LOG_AGENT for consistent agent identification
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh
LOG_AGENT="architect"
# Override log() to append to architect-specific log file
# shellcheck disable=SC2034
log() {
local agent="${LOG_AGENT:-architect}"
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
}
# ── Guards ────────────────────────────────────────────────────────────────
check_active architect
acquire_run_lock "/tmp/architect-run.lock"
memory_guard 2000
log "--- Architect run start ---"
# ── Resolve forge remote for git operations ─────────────────────────────
resolve_forge_remote
# ── Resolve agent identity for .profile repo ────────────────────────────
if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_ARCHITECT_TOKEN:-}" ]; then
AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_ARCHITECT_TOKEN}" \
"${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
fi
# ── Load formula + context ───────────────────────────────────────────────
load_formula_or_profile "architect" "$FACTORY_ROOT/formulas/run-architect.toml" || exit 1
build_context_block VISION.md AGENTS.md ops:prerequisites.md
# ── Prepare .profile context (lessons injection) ─────────────────────────
formula_prepare_profile_context
# ── Build structural analysis graph ──────────────────────────────────────
build_graph_section
# ── Read scratch file (compaction survival) ───────────────────────────────
SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE")
SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE")
# ── Build prompt ─────────────────────────────────────────────────────────
build_sdk_prompt_footer
# Architect prompt: strategic decomposition of vision into sprints
# See: architect/AGENTS.md for full role description
# Pattern: heredoc function to avoid inline prompt construction
# Note: Uses CONTEXT_BLOCK, GRAPH_SECTION, SCRATCH_CONTEXT from formula-session.sh
# Architecture Decision: AD-003 — The runtime creates and destroys, the formula preserves.
build_architect_prompt() {
cat <<_PROMPT_EOF_
You are the architect agent for ${FORGE_REPO}. Work through the formula below.
Your role: strategic decomposition of vision issues into development sprints.
Propose sprints via PRs on the ops repo, converse with humans through PR comments,
and file sub-issues after design forks are resolved.
## Project context
${CONTEXT_BLOCK}
${GRAPH_SECTION}
${SCRATCH_CONTEXT}
$(formula_lessons_block)
## Formula
${FORMULA_CONTENT}
${SCRATCH_INSTRUCTION}
${PROMPT_FOOTER}
_PROMPT_EOF_
}
# ── Build prompt for specific session mode ───────────────────────────────
# Args: session_mode (pitch / questions_phase / start_questions)
# Returns: prompt text via stdout
build_architect_prompt_for_mode() {
local session_mode="$1"
case "$session_mode" in
"start_questions")
cat <<_PROMPT_EOF_
You are the architect agent for ${FORGE_REPO}. Work through the formula below.
Your role: strategic decomposition of vision issues into development sprints.
Propose sprints via PRs on the ops repo, converse with humans through PR comments,
and file sub-issues after design forks are resolved.
## CURRENT STATE: Approved PR awaiting initial design questions
A sprint pitch PR has been approved by the human (via APPROVED review), but the
design conversation has not yet started. Your task is to:
1. Read the approved sprint pitch from the PR body
2. Identify the key design decisions that need human input
3. Post initial design questions (Q1:, Q2:, etc.) as comments on the PR
4. Add a `## Design forks` section to the PR body documenting the design decisions
5. File sub-issues for each design fork path if applicable
This is NOT a pitch phase — the pitch is already approved. This is the START
of the design Q&A phase.
## Project context
${CONTEXT_BLOCK}
${GRAPH_SECTION}
${SCRATCH_CONTEXT}
$(formula_lessons_block)
## Formula
${FORMULA_CONTENT}
${SCRATCH_INSTRUCTION}
${PROMPT_FOOTER}
_PROMPT_EOF_
;;
"questions_phase")
cat <<_PROMPT_EOF_
You are the architect agent for ${FORGE_REPO}. Work through the formula below.
Your role: strategic decomposition of vision issues into development sprints.
Propose sprints via PRs on the ops repo, converse with humans through PR comments,
and file sub-issues after design forks are resolved.
## CURRENT STATE: Design Q&A in progress
A sprint pitch PR is in the questions phase:
- The PR has a `## Design forks` section
- Initial questions (Q1:, Q2:, etc.) have been posted
- Humans may have posted answers or follow-up questions
Your task is to:
1. Read the existing questions and the PR body
2. Read human answers from PR comments
3. Parse the answers and determine next steps
4. Post follow-up questions if needed (Q3:, Q4:, etc.)
5. If all design forks are resolved, file sub-issues for each path
6. Update the `## Design forks` section as you progress
## Project context
${CONTEXT_BLOCK}
${GRAPH_SECTION}
${SCRATCH_CONTEXT}
$(formula_lessons_block)
## Formula
${FORMULA_CONTENT}
${SCRATCH_INSTRUCTION}
${PROMPT_FOOTER}
_PROMPT_EOF_
;;
"pitch"|*)
# Default: pitch new sprints (original behavior)
build_architect_prompt
;;
esac
}
# ── Create worktree ──────────────────────────────────────────────────────
formula_worktree_setup "$WORKTREE"
# ── Detect if PR is in questions-awaiting-answers phase ──────────────────
# A PR is in the questions phase if it has a `## Design forks` section and
# question comments. We check this to decide whether to resume the session
# from the research/questions run (preserves codebase context for answer parsing).
detect_questions_phase() {
local pr_number=""
local pr_body=""
# Get open architect PRs on ops repo
local ops_repo="${OPS_REPO_ROOT:-/home/agent/data/ops}"
if [ ! -d "${ops_repo}/.git" ]; then
return 1
fi
# Use Forgejo API to find open architect PRs
local response
response=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open" 2>/dev/null) || return 1
# Check each open PR for architect markers
pr_number=$(printf '%s' "$response" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null | head -1) || return 1
if [ -z "$pr_number" ]; then
return 1
fi
# Fetch PR body
pr_body=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}" 2>/dev/null | jq -r '.body // empty') || return 1
# Check for `## Design forks` section (added by #101 after ACCEPT)
if ! printf '%s' "$pr_body" | grep -q "## Design forks"; then
return 1
fi
# Check for question comments (Q1:, Q2:, etc.)
# Use jq to extract body text before grepping (handles JSON escaping properly)
local comments
comments=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" 2>/dev/null) || return 1
if ! printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE 'Q[0-9]+:'; then
return 1
fi
# PR is in questions phase
log "Detected PR #${pr_number} in questions-awaiting-answers phase"
return 0
}
# ── Detect if PR is approved and awaiting initial design questions ────────
# A PR is in this state when:
# - It's an open architect PR on ops repo
# - It has an APPROVED review (from human acceptance)
# - It has NO `## Design forks` section yet
# - It has NO Q1:, Q2:, etc. comments yet
# This means the human accepted the pitch and we need to start the design
# conversation by posting initial questions and adding the Design forks section.
detect_approved_pending_questions() {
local pr_number=""
local pr_body=""
# Get open architect PRs on ops repo
local ops_repo="${OPS_REPO_ROOT:-/home/agent/data/ops}"
if [ ! -d "${ops_repo}/.git" ]; then
return 1
fi
# Use Forgejo API to find open architect PRs
local response
response=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open" 2>/dev/null) || return 1
# Check each open PR for architect markers
pr_number=$(printf '%s' "$response" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null | head -1) || return 1
if [ -z "$pr_number" ]; then
return 1
fi
# Fetch PR body
pr_body=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}" 2>/dev/null | jq -r '.body // empty') || return 1
# Check for APPROVED review
local reviews
reviews=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/reviews" 2>/dev/null) || return 1
if ! printf '%s' "$reviews" | jq -e '.[] | select(.state == "APPROVED")' >/dev/null 2>&1; then
return 1
fi
# Check that PR does NOT have `## Design forks` section yet
# (we're in the "start questions" phase, not "process answers" phase)
if printf '%s' "$pr_body" | grep -q "## Design forks"; then
# Has design forks section — this is either in questions phase or past it
return 1
fi
# Check that PR has NO question comments yet (Q1:, Q2:, etc.)
local comments
comments=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" 2>/dev/null) || return 1
if printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE 'Q[0-9]+:'; then
# Has question comments — this is either in questions phase or past it
return 1
fi
# PR is approved and awaiting initial design questions
log "Detected PR #${pr_number} approved and awaiting initial design questions"
return 0
}
# ── Sub-issue existence check ────────────────────────────────────────────
# Check if a vision issue already has sub-issues filed from it.
# Returns 0 if sub-issues exist and are open, 1 otherwise.
# Args: vision_issue_number
has_open_subissues() {
local vision_issue="$1"
local subissue_count=0
# Search for issues whose body contains 'Decomposed from #N' pattern
# Fetch all open issues with bodies in one API call (avoids N+1 calls)
local issues_json
issues_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues?state=open&limit=100" 2>/dev/null) || return 1
# Check each issue for the decomposition pattern using jq to extract bodies
subissue_count=$(printf '%s' "$issues_json" | jq -r --arg vid "$vision_issue" '
[.[] | select(.number != ($vid | tonumber)) | select(.body // "" | contains("Decomposed from #" + $vid))] | length
' 2>/dev/null) || subissue_count=0
if [ "$subissue_count" -gt 0 ]; then
log "Vision issue #${vision_issue} has ${subissue_count} open sub-issue(s) — skipping"
return 0 # Has open sub-issues
fi
log "Vision issue #${vision_issue} has no open sub-issues"
return 1 # No open sub-issues
}
# ── Merged sprint PR check ───────────────────────────────────────────────
# Check if a vision issue already has a merged sprint PR on the ops repo.
# Returns 0 if a merged sprint PR exists, 1 otherwise.
# Args: vision_issue_number
has_merged_sprint_pr() {
local vision_issue="$1"
# Get closed PRs from ops repo
local prs_json
prs_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=closed&limit=100" 2>/dev/null) || return 1
# Check each closed PR for architect markers and vision issue reference
local pr_numbers
pr_numbers=$(printf '%s' "$prs_json" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null) || return 1
local pr_num
while IFS= read -r pr_num; do
[ -z "$pr_num" ] && continue
# Get PR details including merged status
local pr_details
pr_details=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null) || continue
# Check if PR is actually merged (not just closed)
local is_merged
is_merged=$(printf '%s' "$pr_details" | jq -r '.merged // false') || continue
if [ "$is_merged" != "true" ]; then
continue
fi
# Get PR body and check for vision issue reference
local pr_body
pr_body=$(printf '%s' "$pr_details" | jq -r '.body // ""') || continue
# Check if PR body references the vision issue number
# Look for patterns like "#N" where N is the vision issue number
if printf '%s' "$pr_body" | grep -qE "(#|refs|references)[[:space:]]*#${vision_issue}|#${vision_issue}[^0-9]|#${vision_issue}$"; then
log "Found merged sprint PR #${pr_num} referencing vision issue #${vision_issue} — skipping"
return 0 # Has merged sprint PR
fi
done <<< "$pr_numbers"
log "Vision issue #${vision_issue} has no merged sprint PR"
return 1 # No merged sprint PR
}
# ── Helper: Fetch all open vision issues from Forgejo API ─────────────────
# Returns: JSON array of vision issue objects
fetch_vision_issues() {
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues?labels=vision&state=open&limit=100" 2>/dev/null || echo '[]'
}
# ── Helper: Fetch open architect PRs from ops repo Forgejo API ───────────
# Returns: JSON array of architect PR objects
fetch_open_architect_prs() {
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null || echo '[]'
}
# ── Helper: Get vision issue body by number ──────────────────────────────
# Args: issue_number
# Returns: issue body text
get_vision_issue_body() {
local issue_num="$1"
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${issue_num}" 2>/dev/null | jq -r '.body // ""'
}
# ── Helper: Get vision issue title by number ─────────────────────────────
# Args: issue_number
# Returns: issue title
get_vision_issue_title() {
local issue_num="$1"
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${issue_num}" 2>/dev/null | jq -r '.title // ""'
}
# ── Helper: Create a sprint pitch via stateless claude -p call ───────────
# The model NEVER calls Forgejo API. It only reads context and generates pitch.
# Args: vision_issue_number vision_issue_title vision_issue_body
# Returns: pitch markdown to stdout
#
# This is a stateless invocation: the model has no memory between calls.
# All state management (which issues to pitch, dedup logic, etc.) happens in bash.
generate_pitch() {
local issue_num="$1"
local issue_title="$2"
local issue_body="$3"
# Build context block with vision issue details
local pitch_context
pitch_context="
## Vision Issue #${issue_num}
### Title
${issue_title}
### Description
${issue_body}
## Project Context
${CONTEXT_BLOCK}
${GRAPH_SECTION}
$(formula_lessons_block)
## Formula
${FORMULA_CONTENT}
${SCRATCH_INSTRUCTION}
${PROMPT_FOOTER}
"
# Prompt: model generates pitch markdown only, no API calls
local pitch_prompt="You are the architect agent for ${FORGE_REPO}. Write a sprint pitch for the vision issue above.
Instructions:
1. Output ONLY the pitch markdown (no explanations, no preamble, no postscript)
2. Use this exact format:
# Sprint: <sprint-name>
## Vision issues
- #${issue_num} — ${issue_title}
## What this enables
<what the project can do after this sprint that it can't do now>
## What exists today
<current state — infrastructure, interfaces, code that can be reused>
## Complexity
<number of files/subsystems, estimated sub-issues>
<gluecode vs greenfield ratio>
## Risks
<what could go wrong, what breaks if this is done badly>
## Cost — new infra to maintain
<what ongoing maintenance burden does this sprint add>
<new services, scheduled tasks, formulas, agent roles>
## Recommendation
<architect's assessment: worth it / defer / alternative approach>
IMPORTANT: Do NOT include design forks or questions. This is a go/no-go pitch.
---
${pitch_context}
"
# Execute stateless claude -p call
local pitch_output
pitch_output=$(agent_run -p "$pitch_prompt" --output-format json --dangerously-skip-permissions --max-turns 200 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") || true
# Extract pitch content from JSON response
local pitch
pitch=$(printf '%s' "$pitch_output" | jq -r '.content // empty' 2>/dev/null) || pitch=""
if [ -z "$pitch" ]; then
log "WARNING: empty pitch generated for vision issue #${issue_num}"
return 1
fi
# Output pitch to stdout for caller to use
printf '%s' "$pitch"
}
# ── Helper: Create PR on ops repo via Forgejo API ────────────────────────
# Args: sprint_title sprint_body branch_name
# Returns: PR number on success, empty on failure
create_sprint_pr() {
local sprint_title="$1"
local sprint_body="$2"
local branch_name="$3"
# Create branch on ops repo
if ! curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/branches" \
-d "{\"new_branch_name\": \"${branch_name}\", \"old_branch_name\": \"${PRIMARY_BRANCH:-main}\"}" >/dev/null 2>&1; then
log "WARNING: failed to create branch ${branch_name}"
return 1
fi
# Extract sprint name from title for filename
local sprint_name
sprint_name=$(printf '%s' "$sprint_title" | sed 's/^architect: *//; s/ *$//')
local sprint_slug
sprint_slug=$(printf '%s' "$sprint_name" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' | sed 's/--*/-/g')
# Prepare sprint spec content
local sprint_spec="# Sprint: ${sprint_name}
${sprint_body}
"
# Base64 encode the content
local sprint_spec_b64
sprint_spec_b64=$(printf '%s' "$sprint_spec" | base64 -w 0)
# Write sprint spec file to branch
if ! curl -sf -X PUT \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/contents/sprints/${sprint_slug}.md" \
-d "{\"message\": \"sprint: add ${sprint_slug}.md\", \"content\": \"${sprint_spec_b64}\", \"branch\": \"${branch_name}\"}" >/dev/null 2>&1; then
log "WARNING: failed to write sprint spec file"
return 1
fi
# Create PR - use jq to build JSON payload safely (prevents injection from markdown)
local pr_payload
pr_payload=$(jq -n \
--arg title "$sprint_title" \
--arg body "$sprint_body" \
--arg head "$branch_name" \
--arg base "${PRIMARY_BRANCH:-main}" \
'{title: $title, body: $body, head: $head, base: $base}')
local pr_response
pr_response=$(curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls" \
-d "$pr_payload" 2>/dev/null) || return 1
# Extract PR number
local pr_number
pr_number=$(printf '%s' "$pr_response" | jq -r '.number // empty')
log "Created sprint PR #${pr_number}: ${sprint_title}"
printf '%s' "$pr_number"
}
# ── Helper: Post footer comment on PR ────────────────────────────────────
# Args: pr_number
post_pr_footer() {
local pr_number="$1"
local footer="Reply \`ACCEPT\` to proceed with design questions, or \`REJECT: <reason>\` to decline."
if curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" \
-d "{\"body\": \"${footer}\"}" >/dev/null 2>&1; then
log "Posted footer comment on PR #${pr_number}"
return 0
else
log "WARNING: failed to post footer comment on PR #${pr_number}"
return 1
fi
}
# ── Helper: Add in-progress label to vision issue ────────────────────────
# Args: vision_issue_number
add_inprogress_label() {
local issue_num="$1"
# Get label ID for 'in-progress'
local labels_json
labels_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/labels" 2>/dev/null) || return 1
local inprogress_label_id
inprogress_label_id=$(printf '%s' "$labels_json" | jq -r --arg label "in-progress" '.[] | select(.name == $label) | .id' 2>/dev/null) || true
if [ -z "$inprogress_label_id" ]; then
log "WARNING: in-progress label not found"
return 1
fi
# Add label to issue
if curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/repos/${FORGE_REPO}/issues/${issue_num}/labels" \
-d "{\"labels\": [${inprogress_label_id}]}" >/dev/null 2>&1; then
log "Added in-progress label to vision issue #${issue_num}"
return 0
else
log "WARNING: failed to add in-progress label to vision issue #${issue_num}"
return 1
fi
}
# ── Precondition checks in bash before invoking the model ─────────────────
# Check 1: Skip if no vision issues exist and no open architect PRs to handle
vision_count=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/issues?labels=vision&state=open&limit=1" 2>/dev/null | jq length) || vision_count=0
if [ "${vision_count:-0}" -eq 0 ]; then
# Check for open architect PRs that need handling (ACCEPT/REJECT responses)
open_arch_prs=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=10" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0
if [ "${open_arch_prs:-0}" -eq 0 ]; then
log "no vision issues and no open architect PRs — skipping"
exit 0
fi
fi
# Check 2: Scan for ACCEPT/REJECT responses on open architect PRs (unconditional)
# This ensures responses are processed regardless of open_arch_prs count
has_responses_to_process=false
pr_numbers=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers=""
for pr_num in $pr_numbers; do
comments=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue
if printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE '(ACCEPT|REJECT):'; then
has_responses_to_process=true
break
fi
done
# Check 2 (continued): Skip if already at max open pitches (3), unless there are responses to process
open_arch_prs=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0
if [ "${open_arch_prs:-0}" -ge 3 ]; then
if [ "$has_responses_to_process" = false ]; then
log "already 3 open architect PRs with no responses to process — skipping"
exit 0
fi
log "3 open architect PRs found but responses detected — processing"
fi
# ── Bash-driven state management: Select vision issues for pitching ───────
# This logic is also documented in formulas/run-architect.toml preflight step
# Fetch all data from Forgejo API upfront (bash handles state, not model)
vision_issues_json=$(fetch_vision_issues)
open_arch_prs_json=$(fetch_open_architect_prs)
# Build list of vision issues that already have open architect PRs
declare -A _arch_vision_issues_with_open_prs
while IFS= read -r pr_num; do
[ -z "$pr_num" ] && continue
pr_body=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null | jq -r '.body // ""') || continue
# Extract vision issue numbers referenced in PR body (e.g., "refs #419" or "#419")
while IFS= read -r ref_issue; do
[ -z "$ref_issue" ] && continue
_arch_vision_issues_with_open_prs["$ref_issue"]=1
done <<< "$(printf '%s' "$pr_body" | grep -oE '#[0-9]+' | tr -d '#' | sort -u)"
done <<< "$(printf '%s' "$open_arch_prs_json" | jq -r '.[] | select(.title | startswith("architect:")) | .number')"
# Get all open vision issues
vision_issues_json=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/issues?labels=vision&state=open&limit=100" 2>/dev/null) || vision_issues_json='[]'
# Get issues with in-progress label
in_progress_issues=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/issues?labels=in-progress&state=open&limit=100" 2>/dev/null | jq -r '.[].number' 2>/dev/null) || in_progress_issues=""
# Select vision issues for pitching
ARCHITECT_TARGET_ISSUES=()
vision_issue_count=0
pitch_budget=$((3 - open_arch_prs))
# Get all vision issue numbers
vision_issue_nums=$(printf '%s' "$vision_issues_json" | jq -r '.[].number' 2>/dev/null) || vision_issue_nums=""
while IFS= read -r vision_issue; do
[ -z "$vision_issue" ] && continue
vision_issue_count=$((vision_issue_count + 1))
# Skip if pitch budget exhausted
if [ "${pitch_budget}" -le 0 ] || [ ${#ARCHITECT_TARGET_ISSUES[@]} -ge "$pitch_budget" ]; then
log "Pitch budget exhausted (${#ARCHITECT_TARGET_ISSUES[@]}/${pitch_budget})"
break
fi
# Skip if vision issue already has open architect PR
if [ "${_arch_vision_issues_with_open_prs[$vision_issue]:-}" = "1" ]; then
log "Vision issue #${vision_issue} already has open architect PR — skipping"
continue
fi
# Skip if vision issue has in-progress label
if printf '%s\n' "$in_progress_issues" | grep -q "^${vision_issue}$"; then
log "Vision issue #${vision_issue} has in-progress label — skipping"
continue
fi
# Skip if vision issue has open sub-issues (already being worked on)
if has_open_subissues "$vision_issue"; then
log "Vision issue #${vision_issue} has open sub-issues — skipping"
continue
fi
# Skip if vision issue has merged sprint PR (decomposition already done)
if has_merged_sprint_pr "$vision_issue"; then
log "Vision issue #${vision_issue} has merged sprint PR — skipping"
continue
fi
# Add to target issues
ARCHITECT_TARGET_ISSUES+=("$vision_issue")
log "Selected vision issue #${vision_issue} for pitching"
done <<< "$vision_issue_nums"
# If no issues selected, decide whether to exit or process responses
if [ ${#ARCHITECT_TARGET_ISSUES[@]} -eq 0 ]; then
if [ "${has_responses_to_process:-false}" = "true" ]; then
log "No new pitches needed — responses to process"
# Fall through to response processing block below
else
log "No vision issues available for pitching (all have open PRs, sub-issues, or merged sprint PRs) — signaling PHASE:done"
# Signal PHASE:done by writing to phase file if it exists
if [ -f "/tmp/architect-${PROJECT_NAME}.phase" ]; then
echo "PHASE:done" > "/tmp/architect-${PROJECT_NAME}.phase"
fi
exit 0
fi
fi
log "Selected ${#ARCHITECT_TARGET_ISSUES[@]} vision issue(s) for pitching: ${ARCHITECT_TARGET_ISSUES[*]}"
# ── Stateless pitch generation and PR creation (bash-driven, no model API calls) ──
# For each target issue:
# 1. Fetch issue body from Forgejo API (bash)
# 2. Invoke claude -p with issue body + context (stateless, no API calls)
# 3. Create PR with pitch content (bash)
# 4. Post footer comment (bash)
pitch_count=0
for vision_issue in "${ARCHITECT_TARGET_ISSUES[@]}"; do
log "Processing vision issue #${vision_issue}"
# Fetch vision issue details from Forgejo API (bash, not model)
issue_title=$(get_vision_issue_title "$vision_issue")
issue_body=$(get_vision_issue_body "$vision_issue")
if [ -z "$issue_title" ] || [ -z "$issue_body" ]; then
log "WARNING: failed to fetch vision issue #${vision_issue} details"
continue
fi
# Generate pitch via stateless claude -p call (model has no API access)
log "Generating pitch for vision issue #${vision_issue}"
pitch=$(generate_pitch "$vision_issue" "$issue_title" "$issue_body") || true
if [ -z "$pitch" ]; then
log "WARNING: failed to generate pitch for vision issue #${vision_issue}"
continue
fi
# Create sprint PR (bash, not model)
# Use issue number in branch name to avoid collisions across runs
branch_name="architect/sprint-vision-${vision_issue}"
pr_number=$(create_sprint_pr "architect: ${issue_title}" "$pitch" "$branch_name")
if [ -z "$pr_number" ]; then
log "WARNING: failed to create PR for vision issue #${vision_issue}"
continue
fi
# Post footer comment
post_pr_footer "$pr_number"
# Add in-progress label to vision issue
add_inprogress_label "$vision_issue"
pitch_count=$((pitch_count + 1))
log "Completed pitch for vision issue #${vision_issue} — PR #${pr_number}"
done
log "Generated ${pitch_count} sprint pitch(es)"
# ── Run agent for response processing if needed ───────────────────────────
# Always process ACCEPT/REJECT responses when present, regardless of new pitches
if [ "${has_responses_to_process:-false}" = "true" ]; then
log "Processing ACCEPT/REJECT responses on existing PRs"
# Check if any PRs have responses that need agent handling
needs_agent=false
pr_numbers=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers=""
for pr_num in $pr_numbers; do
# Check for ACCEPT/REJECT in comments
comments=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue
# Check for review decisions (higher precedence)
reviews=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}/reviews" 2>/dev/null) || reviews=""
# Check for ACCEPT (APPROVED review or ACCEPT comment)
if printf '%s' "$reviews" | jq -e '.[] | select(.state == "APPROVED")' >/dev/null 2>&1; then
log "PR #${pr_num} has APPROVED review — needs agent handling"
needs_agent=true
elif printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qiE '^[^:]+: *ACCEPT'; then
log "PR #${pr_num} has ACCEPT comment — needs agent handling"
needs_agent=true
elif printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qiE '^[^:]+: *REJECT:'; then
log "PR #${pr_num} has REJECT comment — needs agent handling"
needs_agent=true
fi
done
# Run agent only if there are responses to process
if [ "$needs_agent" = "true" ]; then
# Determine session handling based on PR state
RESUME_ARGS=()
SESSION_MODE="fresh"
if detect_questions_phase; then
# PR is in questions-awaiting-answers phase — resume from that session
if [ -f "$SID_FILE" ]; then
RESUME_SESSION=$(cat "$SID_FILE")
RESUME_ARGS=(--resume "$RESUME_SESSION")
SESSION_MODE="questions_phase"
log "PR in questions-awaiting-answers phase — resuming session: ${RESUME_SESSION:0:12}..."
else
log "PR in questions phase but no session file — starting fresh session"
fi
elif detect_approved_pending_questions; then
# PR is approved but awaiting initial design questions — start fresh with special prompt
SESSION_MODE="start_questions"
log "PR approved and awaiting initial design questions — starting fresh session"
else
log "PR not in questions phase — starting fresh session"
fi
# Build prompt with appropriate mode
PROMPT_FOR_MODE=$(build_architect_prompt_for_mode "$SESSION_MODE")
agent_run "${RESUME_ARGS[@]}" --worktree "$WORKTREE" "$PROMPT_FOR_MODE"
log "agent_run complete"
fi
fi
# ── Clean up scratch files (legacy single file + per-issue files) ──────────
rm -f "$SCRATCH_FILE"
rm -f "${SCRATCH_FILE_PREFIX}"-*.md
# Write journal entry post-session
profile_write_journal "architect-run" "Architect run $(date -u +%Y-%m-%d)" "complete" "" || true
log "--- Architect run done ---"

File diff suppressed because it is too large Load diff

View file

@ -1,40 +1,22 @@
<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 --> <!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
# Dev Agent # Dev Agent
**Role**: Implement issues autonomously — write code, push branches, address **Role**: Implement issues autonomously — write code, push branches, address
CI failures and review feedback. CI failures and review feedback.
**Trigger**: `dev-poll.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh` **Trigger**: `dev-poll.sh` runs every 10 min via cron. Sources `lib/guard.sh` and
every 5 minutes (iteration math at line 171-175). Sources `lib/guard.sh` and calls calls `check_active dev` first — skips if `$FACTORY_ROOT/state/.dev-active` is
`check_active dev` first — skips if `$FACTORY_ROOT/state/.dev-active` is absent. Then absent. Then performs a direct-merge scan (approved + CI green PRs — including
performs a direct-merge scan (approved + CI green PRs — including chore/gardener PRs chore/gardener PRs without issue numbers), then checks the agent lock and scans
without issue numbers), then checks the agent lock and scans for ready issues using a for ready issues using a two-tier priority queue: (1) `priority`+`backlog` issues
two-tier priority queue: (1) `priority`+`backlog` issues first (FIFO within tier), then first (FIFO within tier), then (2) plain `backlog` issues (FIFO). Orphaned
(2) plain `backlog` issues (FIFO). Orphaned in-progress issues are also picked up. The in-progress issues are also picked up. The direct-merge scan runs before the lock
direct-merge scan runs before the lock check so approved PRs get merged even while a check so approved PRs get merged even while a dev-agent session is active.
dev-agent session is active.
**Key files**: **Key files**:
- `dev/dev-poll.sh` — Polling loop participant: finds next ready issue, handles merge/rebase - `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `action`, `prediction/dismissed`, or `prediction/unreviewed` (replaced `prediction/backlog` — that label no longer exists)
of approved PRs, tracks CI fix attempts. Invoked by `docker/agents/entrypoint.sh` every 5
minutes. `BOT_USER` is resolved once at startup via the Forge `/user` API and cached for
all assignee checks. Formula guard skips issues labeled `formula`, `prediction/dismissed`,
or `prediction/unreviewed`. **Race prevention**: checks issue assignee before claiming —
skips if assigned to a different bot user. **Stale branch abandonment**: closes PRs and
deletes branches that are behind `$PRIMARY_BRANCH` (restarts poll cycle for a fresh start).
**Stale in-progress recovery**: on each poll cycle, scans for issues labeled `in-progress`.
If the issue has a `vision` label, sets `BLOCKED_BY_INPROGRESS=true` and skips further
stale checks (vision issues are managed by the architect). If the issue is assigned to
`$BOT_USER` (this agent), checks for pending review feedback first — if an open PR has
`REQUEST_CHANGES`, spawns the dev-agent to address it before setting `BLOCKED_BY_INPROGRESS=true`;
otherwise just sets blocked. If assigned to another agent, logs and falls through (does not
block). If no assignee, no open PR, and no agent lock file — removes `in-progress`, adds
`blocked` with a human-triage comment. **Per-agent open-PR gate**: before starting new work,
filters open waiting PRs to only those assigned to this agent (`$BOT_USER`). Other agents'
PRs do not block this agent's pipeline (#358, #369). **Pre-lock merge scan own-PRs only**:
the direct-merge scan only merges PRs whose linked issue is assigned to this agent — skips
PRs owned by other bot users (#374).
- `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval - `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval
- `dev/phase-handler.sh` — Phase callback functions: `post_refusal_comment()`, `_on_phase_change()`, `build_phase_protocol_prompt()`. `do_merge()` detects already-merged PRs on HTTP 405 (race with dev-poll's pre-lock scan) and returns success instead of escalating. Sources `lib/mirrors.sh` and calls `mirror_push()` after every successful merge.
- `dev/phase-test.sh` — Integration test for the phase protocol - `dev/phase-test.sh` — Integration test for the phase protocol
**Environment variables consumed** (via `lib/env.sh` + project TOML): **Environment variables consumed** (via `lib/env.sh` + project TOML):
@ -51,9 +33,9 @@ PRs owned by other bot users (#374).
**Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h. **Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h.
**Lifecycle**: dev-poll.sh (invoked by polling loop, `check_active dev`) → dev-agent.sh → **Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux `dev-{project}-{issue}` → phase file
tmux session → phase file drives CI/review loop → merge + `mirror_push()` → close issue. drives CI/review loop → merge + `mirror_push()` → close issue. On respawn after
On respawn after `PHASE:escalate`, the stale phase file is cleared first so the session `PHASE:escalate`, the stale phase file is cleared first so the session starts
starts clean; the reinject prompt tells Claude not to re-escalate for the same reason. clean; the reinject prompt tells Claude not to re-escalate for the same reason.
On respawn for any active PR, the prompt explicitly tells Claude the PR already exists On respawn for any active PR, the prompt explicitly tells Claude the PR already
and not to create a new one via API. exists and not to create a new one via API.

View file

@ -30,7 +30,6 @@ source "$(dirname "$0")/../lib/worktree.sh"
source "$(dirname "$0")/../lib/pr-lifecycle.sh" source "$(dirname "$0")/../lib/pr-lifecycle.sh"
source "$(dirname "$0")/../lib/mirrors.sh" source "$(dirname "$0")/../lib/mirrors.sh"
source "$(dirname "$0")/../lib/agent-sdk.sh" source "$(dirname "$0")/../lib/agent-sdk.sh"
source "$(dirname "$0")/../lib/formula-session.sh"
# Auto-pull factory code to pick up merged fixes before any logic runs # Auto-pull factory code to pick up merged fixes before any logic runs
git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true
@ -41,7 +40,7 @@ REPO_ROOT="${PROJECT_REPO_ROOT}"
LOCKFILE="/tmp/dev-agent-${PROJECT_NAME:-default}.lock" LOCKFILE="/tmp/dev-agent-${PROJECT_NAME:-default}.lock"
STATUSFILE="/tmp/dev-agent-status-${PROJECT_NAME:-default}" STATUSFILE="/tmp/dev-agent-status-${PROJECT_NAME:-default}"
BRANCH="fix/issue-${ISSUE}" # Default; will be updated after FORGE_REMOTE is known BRANCH="fix/issue-${ISSUE}"
WORKTREE="/tmp/${PROJECT_NAME}-worktree-${ISSUE}" WORKTREE="/tmp/${PROJECT_NAME}-worktree-${ISSUE}"
SID_FILE="/tmp/dev-session-${PROJECT_NAME}-${ISSUE}.sid" SID_FILE="/tmp/dev-session-${PROJECT_NAME}-${ISSUE}.sid"
PREFLIGHT_RESULT="/tmp/dev-agent-preflight.json" PREFLIGHT_RESULT="/tmp/dev-agent-preflight.json"
@ -186,11 +185,7 @@ log "preflight passed"
# ============================================================================= # =============================================================================
# CLAIM ISSUE # CLAIM ISSUE
# ============================================================================= # =============================================================================
if ! issue_claim "$ISSUE"; then issue_claim "$ISSUE"
log "SKIP: failed to claim issue #${ISSUE} (already assigned to another agent)"
echo '{"status":"already_done","reason":"issue was claimed by another agent"}' > "$PREFLIGHT_RESULT"
exit 0
fi
CLAIMED=true CLAIMED=true
# ============================================================================= # =============================================================================
@ -263,19 +258,6 @@ FORGE_REMOTE="${FORGE_REMOTE:-origin}"
export FORGE_REMOTE export FORGE_REMOTE
log "forge remote: ${FORGE_REMOTE}" log "forge remote: ${FORGE_REMOTE}"
# Generate unique branch name per attempt to avoid collision with failed attempts
# Only apply when not in recovery mode (RECOVERY_MODE branch is already set from existing PR)
# First attempt: fix/issue-N, subsequent: fix/issue-N-1, fix/issue-N-2, etc.
if [ "$RECOVERY_MODE" = false ]; then
# Count only branches matching fix/issue-N, fix/issue-N-1, fix/issue-N-2, etc. (exact prefix match)
ATTEMPT=$(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}" 2>/dev/null | grep -c "refs/heads/fix/issue-${ISSUE}$" || echo 0)
ATTEMPT=$((ATTEMPT + $(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}-*" 2>/dev/null | wc -l)))
if [ "$ATTEMPT" -gt 0 ]; then
BRANCH="fix/issue-${ISSUE}-${ATTEMPT}"
fi
fi
log "using branch: ${BRANCH}"
if [ "$RECOVERY_MODE" = true ]; then if [ "$RECOVERY_MODE" = true ]; then
if ! worktree_recover "$WORKTREE" "$BRANCH" "$FORGE_REMOTE"; then if ! worktree_recover "$WORKTREE" "$BRANCH" "$FORGE_REMOTE"; then
log "ERROR: worktree recovery failed" log "ERROR: worktree recovery failed"
@ -320,10 +302,6 @@ OPEN_ISSUES_SUMMARY=$(forge_api GET "/issues?state=open&labels=backlog&limit=20&
PUSH_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE") PUSH_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE")
# Load lessons from .profile repo if available (pre-session)
profile_load_lessons || true
LESSONS_INJECTION="${LESSONS_CONTEXT:-}"
if [ "$RECOVERY_MODE" = true ]; then if [ "$RECOVERY_MODE" = true ]; then
GIT_DIFF_STAT=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --stat 2>/dev/null \ GIT_DIFF_STAT=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --stat 2>/dev/null \
| head -20 || echo "(no diff)") | head -20 || echo "(no diff)")
@ -354,10 +332,6 @@ ${GIT_DIFF_STAT}
3. Address any pending review comments or CI failures. 3. Address any pending review comments or CI failures.
4. Commit and push to \`${BRANCH}\`. 4. Commit and push to \`${BRANCH}\`.
${LESSONS_INJECTION:+## Lessons learned
${LESSONS_INJECTION}
}
${PUSH_INSTRUCTIONS}" ${PUSH_INSTRUCTIONS}"
else else
INITIAL_PROMPT="You are working in a git worktree at ${WORKTREE} on branch ${BRANCH}. INITIAL_PROMPT="You are working in a git worktree at ${WORKTREE} on branch ${BRANCH}.
@ -373,10 +347,6 @@ ${OPEN_ISSUES_SUMMARY}
$(if [ -n "$PRIOR_ART_DIFF" ]; then $(if [ -n "$PRIOR_ART_DIFF" ]; then
printf '## Prior Art (closed PR — DO NOT start from scratch)\n\nA previous PR attempted this issue but was closed without merging. Reuse as much as possible.\n\n```diff\n%s\n```\n' "$PRIOR_ART_DIFF" printf '## Prior Art (closed PR — DO NOT start from scratch)\n\nA previous PR attempted this issue but was closed without merging. Reuse as much as possible.\n\n```diff\n%s\n```\n' "$PRIOR_ART_DIFF"
fi) fi)
${LESSONS_INJECTION:+## Lessons learned
${LESSONS_INJECTION}
}
## Instructions ## Instructions
1. Read AGENTS.md in this repo for project context and coding conventions. 1. Read AGENTS.md in this repo for project context and coding conventions.
@ -480,40 +450,6 @@ Closing as already implemented."
fi fi
log "ERROR: no branch pushed after agent_run" log "ERROR: no branch pushed after agent_run"
# Dump diagnostics
diag_file="${DISINTO_LOG_DIR:-/tmp}/dev/agent-run-last.json"
if [ -f "$diag_file" ]; then
result_text=""; cost_usd=""; num_turns=""
result_text=$(jq -r '.result // "no result field"' "$diag_file" 2>/dev/null | head -50) || result_text="(parse error)"
cost_usd=$(jq -r '.cost_usd // "?"' "$diag_file" 2>/dev/null) || cost_usd="?"
num_turns=$(jq -r '.num_turns // "?"' "$diag_file" 2>/dev/null) || num_turns="?"
log "no_push diagnostics: turns=${num_turns} cost=${cost_usd}"
log "no_push result: ${result_text}"
# Save full output for later analysis
cp "$diag_file" "${DISINTO_LOG_DIR:-/tmp}/dev/no-push-${ISSUE}-$(date +%s).json" 2>/dev/null || true
fi
# Save full session log for debugging
# Session logs are stored in CLAUDE_CONFIG_DIR/projects/{worktree-hash}/{session-id}.jsonl
_wt_hash=$(printf '%s' "$WORKTREE" | md5sum | cut -c1-12)
_cl_config="${CLAUDE_CONFIG_DIR:-$HOME/.claude}"
_session_log="${_cl_config}/projects/${_wt_hash}/${_AGENT_SESSION_ID}.jsonl"
if [ -f "$_session_log" ]; then
cp "$_session_log" "${DISINTO_LOG_DIR}/dev/no-push-session-${ISSUE}-$(date +%s).jsonl" 2>/dev/null || true
log "no_push session log saved to ${DISINTO_LOG_DIR}/dev/no-push-session-${ISSUE}-*.jsonl"
fi
# Log session summary for debugging
if [ -f "$_session_log" ]; then
_read_calls=$(grep -c '"type":"read"' "$_session_log" 2>/dev/null || echo "0")
_edit_calls=$(grep -c '"type":"edit"' "$_session_log" 2>/dev/null || echo "0")
_bash_calls=$(grep -c '"type":"bash"' "$_session_log" 2>/dev/null || echo "0")
_text_calls=$(grep -c '"type":"text"' "$_session_log" 2>/dev/null || echo "0")
_failed_calls=$(grep -c '"exit_code":null' "$_session_log" 2>/dev/null || echo "0")
_total_turns=$(grep -c '"type":"turn"' "$_session_log" 2>/dev/null || echo "0")
log "no_push session summary: turns=${_total_turns} reads=${_read_calls} edits=${_edit_calls} bash=${_bash_calls} text=${_text_calls} failed=${_failed_calls}"
fi
issue_block "$ISSUE" "no_push" "Claude did not push branch ${BRANCH}" issue_block "$ISSUE" "no_push" "Claude did not push branch ${BRANCH}"
CLAIMED=false CLAIMED=false
worktree_cleanup "$WORKTREE" worktree_cleanup "$WORKTREE"
@ -561,12 +497,6 @@ if [ "$rc" -eq 0 ]; then
log "PR #${PR_NUMBER} merged" log "PR #${PR_NUMBER} merged"
issue_close "$ISSUE" issue_close "$ISSUE"
# Capture files changed for journal entry (after agent work)
FILES_CHANGED=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --name-only 2>/dev/null | tr '\n' ',' | sed 's/,$//') || FILES_CHANGED=""
# Write journal entry post-session (before cleanup)
profile_write_journal "$ISSUE" "$ISSUE_TITLE" "merged" "$FILES_CHANGED" || true
# Pull primary branch and push to mirrors # Pull primary branch and push to mirrors
git -C "$REPO_ROOT" fetch "$FORGE_REMOTE" "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$REPO_ROOT" fetch "$FORGE_REMOTE" "$PRIMARY_BRANCH" 2>/dev/null || true
git -C "$REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true
@ -580,18 +510,6 @@ else
# Exhausted or unrecoverable failure # Exhausted or unrecoverable failure
log "PR walk failed: ${_PR_WALK_EXIT_REASON:-unknown}" log "PR walk failed: ${_PR_WALK_EXIT_REASON:-unknown}"
issue_block "$ISSUE" "${_PR_WALK_EXIT_REASON:-agent_failed}" issue_block "$ISSUE" "${_PR_WALK_EXIT_REASON:-agent_failed}"
# Capture files changed for journal entry (after agent work)
FILES_CHANGED=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --name-only 2>/dev/null | tr '\n' ',' | sed 's/,$//') || FILES_CHANGED=""
# Write journal entry post-session (before cleanup)
outcome="blocked_${_PR_WALK_EXIT_REASON:-agent_failed}"
profile_write_journal "$ISSUE" "$ISSUE_TITLE" "$outcome" "$FILES_CHANGED" || true
# Cleanup on failure: preserve remote branch and PR for debugging, clean up local worktree
# Remote state (PR and branch) stays open for inspection of CI logs and review comments
worktree_cleanup "$WORKTREE"
rm -f "$SID_FILE" "$IMPL_SUMMARY_FILE"
CLAIMED=false CLAIMED=false
fi fi

View file

@ -14,7 +14,7 @@
# 3. Ready "backlog" issues without "priority" (FIFO within tier) # 3. Ready "backlog" issues without "priority" (FIFO within tier)
# #
# Usage: # Usage:
# Called by: entrypoint.sh polling loop (every 10 min) # cron every 10min
# dev-poll.sh [projects/harb.toml] # optional project config # dev-poll.sh [projects/harb.toml] # optional project config
set -euo pipefail set -euo pipefail
@ -42,11 +42,6 @@ log() {
printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
} }
# Resolve current agent identity once at startup — cache for all assignee checks
BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API%%/repos*}/user" | jq -r '.login') || BOT_USER=""
log "running as agent: ${BOT_USER}"
# ============================================================================= # =============================================================================
# CI FIX TRACKER: per-PR counter to avoid infinite respawn loops (max 3) # CI FIX TRACKER: per-PR counter to avoid infinite respawn loops (max 3)
# ============================================================================= # =============================================================================
@ -99,100 +94,6 @@ is_blocked() {
| jq -e '.[] | select(.name == "blocked")' >/dev/null 2>&1 | jq -e '.[] | select(.name == "blocked")' >/dev/null 2>&1
} }
# =============================================================================
# STALENESS DETECTION FOR IN-PROGRESS ISSUES
# =============================================================================
# Check if in-progress label was added recently (within grace period).
# Prevents race where a poller marks an issue as stale before the claiming
# agent's assign + label sequence has fully propagated. See issue #471.
# Args: issue_number [grace_seconds]
# Returns: 0 if recently added (within grace period), 1 if not
in_progress_recently_added() {
local issue="$1" grace="${2:-60}"
local now label_ts delta
now=$(date +%s)
# Query issue timeline for the most recent in-progress label event.
# Forgejo 11.x API returns type as string "label", not integer 7.
label_ts=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${issue}/timeline" | \
jq -r '[.[] | select(.type == "label") | select(.label.name == "in-progress")] | last | .created_at // empty') || true
if [ -z "$label_ts" ]; then
return 1 # no label event found — not recently added
fi
# Convert ISO timestamp to epoch and compare
local label_epoch
label_epoch=$(date -d "$label_ts" +%s 2>/dev/null || echo 0)
delta=$(( now - label_epoch ))
if [ "$delta" -lt "$grace" ]; then
return 0 # within grace period
fi
return 1
}
# Check if there's an open PR for a specific issue
# Args: issue_number
# Returns: 0 if open PR exists, 1 if not
open_pr_exists() {
local issue="$1"
local branch="fix/issue-${issue}"
local pr_num
pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls?state=open&limit=20" | \
jq -r --arg branch "$branch" \
'.[] | select(.head.ref == $branch) | .number' | head -1) || true
[ -n "$pr_num" ]
}
# Relabel a stale in-progress issue to blocked with diagnostic comment
# Args: issue_number reason
# Uses shared helpers from lib/issue-lifecycle.sh
relabel_stale_issue() {
local issue="$1" reason="$2"
log "relabeling stale in-progress issue #${issue} to blocked: ${reason}"
# Remove in-progress label
local ip_id
ip_id=$(_ilc_in_progress_id)
if [ -n "$ip_id" ]; then
curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${issue}/labels/${ip_id}" >/dev/null 2>&1 || true
fi
# Add blocked label
local bk_id
bk_id=$(_ilc_blocked_id)
if [ -n "$bk_id" ]; then
curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${issue}/labels" \
-d "{\"labels\":[${bk_id}]}" >/dev/null 2>&1 || true
fi
# Post diagnostic comment using shared helper
local comment_body
comment_body=$(
printf '%s\n\n' '### Stale in-progress issue detected'
printf '%s\n' '| Field | Value |'
printf '%s\n' '|---|---|'
printf '| Detection reason | `%s` |\n' "$reason"
printf '| Timestamp | `%s` |\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
printf '%s\n' '**Status:** This issue was labeled `in-progress` but has no assignee, no open PR, and no agent lock file.'
printf '%s\n' '**Action required:** A maintainer should triage this issue.'
)
_ilc_post_comment "$issue" "$comment_body"
_ilc_log "stale issue #${issue} relabeled to blocked: ${reason}"
}
# ============================================================================= # =============================================================================
# HELPER: handle CI-exhaustion check/block (DRY for 3 call sites) # HELPER: handle CI-exhaustion check/block (DRY for 3 call sites)
# Sets CI_FIX_ATTEMPTS for caller use. Returns 0 if exhausted, 1 if not. # Sets CI_FIX_ATTEMPTS for caller use. Returns 0 if exhausted, 1 if not.
@ -254,10 +155,9 @@ try_direct_merge() {
if [ "$issue_num" -gt 0 ]; then if [ "$issue_num" -gt 0 ]; then
issue_close "$issue_num" issue_close "$issue_num"
# Remove in-progress label (don't re-add backlog — issue is closed) # Remove in-progress label (don't re-add backlog — issue is closed)
IP_ID=$(_ilc_in_progress_id)
curl -sf -X DELETE \ curl -sf -X DELETE \
-H "Authorization: token ${FORGE_TOKEN}" \ -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${issue_num}/labels/${IP_ID}" >/dev/null 2>&1 || true "${API}/issues/${issue_num}/labels/in-progress" >/dev/null 2>&1 || true
rm -f "/tmp/dev-session-${PROJECT_NAME}-${issue_num}.sid" \ rm -f "/tmp/dev-session-${PROJECT_NAME}-${issue_num}.sid" \
"/tmp/dev-impl-summary-${PROJECT_NAME}-${issue_num}.txt" "/tmp/dev-impl-summary-${PROJECT_NAME}-${issue_num}.txt"
fi fi
@ -377,16 +277,6 @@ for i in $(seq 0 $(($(echo "$PL_PRS" | jq 'length') - 1))); do
jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true
if [ "${PL_HAS_APPROVE:-0}" -gt 0 ]; then if [ "${PL_HAS_APPROVE:-0}" -gt 0 ]; then
# Check if issue is assigned to this agent — only merge own PRs
if [ "$PL_ISSUE" -gt 0 ]; then
PR_ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${PL_ISSUE}") || true
PR_ISSUE_ASSIGNEE=$(echo "$PR_ISSUE_JSON" | jq -r '.assignee.login // ""') || true
if [ -n "$PR_ISSUE_ASSIGNEE" ] && [ "$PR_ISSUE_ASSIGNEE" != "$BOT_USER" ]; then
log "PR #${PL_PR_NUM} (issue #${PL_ISSUE}) assigned to ${PR_ISSUE_ASSIGNEE} — skipping merge (not mine)"
continue
fi
fi
if try_direct_merge "$PL_PR_NUM" "$PL_ISSUE"; then if try_direct_merge "$PL_PR_NUM" "$PL_ISSUE"; then
PL_MERGED_ANY=true PL_MERGED_ANY=true
fi fi
@ -410,9 +300,6 @@ if [ -f "$LOCKFILE" ]; then
rm -f "$LOCKFILE" rm -f "$LOCKFILE"
fi fi
# --- Fetch origin refs before any stale branch checks ---
git fetch origin --prune 2>/dev/null || true
# --- Memory guard --- # --- Memory guard ---
memory_guard 2000 memory_guard 2000
@ -420,133 +307,31 @@ memory_guard 2000
# PRIORITY 1: orphaned in-progress issues # PRIORITY 1: orphaned in-progress issues
# ============================================================================= # =============================================================================
log "checking for in-progress issues" log "checking for in-progress issues"
ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues?state=open&labels=in-progress&limit=10&type=issues") "${API}/issues?state=open&labels=in-progress&limit=10&type=issues")
ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length') ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length')
BLOCKED_BY_INPROGRESS=false
if [ "$ORPHAN_COUNT" -gt 0 ]; then if [ "$ORPHAN_COUNT" -gt 0 ]; then
ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number') ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number')
# Staleness check: if no assignee, no open PR, and no agent lock, the issue is stale
OPEN_PR=false
if curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls?state=open&limit=20" | \
jq -e --arg branch "fix/issue-${ISSUE_NUM}" \
'.[] | select(.head.ref == $branch)' >/dev/null 2>&1; then
OPEN_PR=true
fi
# Skip vision-labeled issues — they are managed by architect agent, not dev-poll
issue_labels=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${ISSUE_NUM}" | jq -r '[.labels[].name] | join(",")')
if echo "$issue_labels" | grep -q "vision"; then
log "issue #${ISSUE_NUM} has 'vision' label — skipping stale detection (managed by architect)"
BLOCKED_BY_INPROGRESS=true
fi
# Check if issue has an assignee — only block on issues assigned to this agent
assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" "${API}/issues/${ISSUE_NUM}" | jq -r '.assignee.login // ""')
if [ -n "$assignee" ]; then
if [ "$assignee" = "$BOT_USER" ]; then
# Check if my PR has review feedback to address before exiting
HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls?state=open&limit=20" | \
jq -r --arg branch "fix/issue-${ISSUE_NUM}" \
'.[] | select(.head.ref == $branch) | .number' | head -1) || true
if [ -n "$HAS_PR" ]; then
# Check for REQUEST_CHANGES review feedback
REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls/${HAS_PR}/reviews") || true
HAS_CHANGES=$(echo "$REVIEWS_JSON" | \
jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true
if [ "${HAS_CHANGES:-0}" -gt 0 ]; then
log "issue #${ISSUE_NUM} has review feedback — spawning agent"
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)"
BLOCKED_BY_INPROGRESS=true
else
log "issue #${ISSUE_NUM} assigned to me — my thread is busy"
BLOCKED_BY_INPROGRESS=true
fi
else
log "issue #${ISSUE_NUM} assigned to me — my thread is busy"
BLOCKED_BY_INPROGRESS=true
fi
else
log "issue #${ISSUE_NUM} assigned to ${assignee} — their thread, not blocking"
BLOCKED_BY_INPROGRESS=true
# Issue assigned to another agent — don't block, fall through to backlog
fi
fi
# Only proceed with in-progress checks if not blocked by another agent
if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
# Check for dev-agent lock file (agent may be running in another container)
LOCK_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE_NUM}.txt"
if [ -f "$LOCK_FILE" ]; then
log "issue #${ISSUE_NUM} has agent lock file — trusting active work"
BLOCKED_BY_INPROGRESS=true
fi
if [ "$OPEN_PR" = false ] && [ "$BLOCKED_BY_INPROGRESS" = false ]; then
# Grace period: skip if in-progress label was added <60s ago (issue #471)
if in_progress_recently_added "$ISSUE_NUM" 60; then
log "issue #${ISSUE_NUM} in-progress label added <60s ago — skipping stale detection (grace period)"
BLOCKED_BY_INPROGRESS=true
else
log "issue #${ISSUE_NUM} is stale (no assignee, no open PR, no agent lock) — relabeling to blocked"
relabel_stale_issue "$ISSUE_NUM" "no_assignee_no_open_pr_no_lock"
BLOCKED_BY_INPROGRESS=true
fi
fi
# Formula guard: formula-labeled issues should not be worked on by dev-agent. # Formula guard: formula-labeled issues should not be worked on by dev-agent.
# Remove in-progress label and skip to prevent infinite respawn cycle (#115). # Remove in-progress label and skip to prevent infinite respawn cycle (#115).
if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true
SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|action|prediction/dismissed|prediction/unreviewed)$' | head -1) || true
if [ -n "$SKIP_LABEL" ]; then if [ -n "$SKIP_LABEL" ]; then
log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping"
IP_ID=$(_ilc_in_progress_id)
curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true "${API}/issues/${ISSUE_NUM}/labels/in-progress" >/dev/null 2>&1 || true
BLOCKED_BY_INPROGRESS=true exit 0
fi
fi fi
# Check if there's already an open PR for this issue # Check if there's already an open PR for this issue
if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls?state=open&limit=20" | \ "${API}/pulls?state=open&limit=20" | \
jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ jq -r --arg branch "fix/issue-${ISSUE_NUM}" \
'.[] | select(.head.ref == $branch) | .number' | head -1) || true '.[] | select(.head.ref == $branch) | .number' | head -1) || true
if [ -n "$HAS_PR" ]; then if [ -n "$HAS_PR" ]; then
# Check if branch is stale (behind primary branch)
BRANCH="fix/issue-${ISSUE_NUM}"
AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0")
if [ "$AHEAD" -gt 0 ]; then
log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR"
# Close the PR via API
curl -sf -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/pulls/${HAS_PR}" \
-d '{"state":"closed"}' >/dev/null 2>&1 || true
# Delete the branch via git push
git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true
# Reset to fresh start on primary branch
git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true
git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true
BLOCKED_BY_INPROGRESS=true
fi
# Only process PR if not abandoned (stale branch check above)
if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true
CI_STATE=$(ci_commit_status "$PR_SHA") || true CI_STATE=$(ci_commit_status "$PR_SHA") || true
@ -567,32 +352,21 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then
if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then
BLOCKED_BY_INPROGRESS=true exit 0
else fi
# Direct merge failed (conflicts?) — fall back to dev-agent # Direct merge failed (conflicts?) — fall back to dev-agent
log "falling back to dev-agent for PR #${HAS_PR} merge" log "falling back to dev-agent for PR #${HAS_PR} merge"
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)"
BLOCKED_BY_INPROGRESS=true exit 0
fi
# Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is
# pending/unknown. Definitive CI failure is handled by the elif below. # pending/unknown. Definitive CI failure is handled by the elif below.
elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then
# Check if issue is assigned to this agent — skip if assigned to another bot
ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${ISSUE_NUM}") || true
assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true
if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then
log "issue #${ISSUE_NUM} PR #${HAS_PR} REQUEST_CHANGES but assigned to ${assignee} — skipping"
# Don't block — fall through to backlog
BLOCKED_BY_INPROGRESS=false
else
log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent"
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)"
BLOCKED_BY_INPROGRESS=true exit 0
fi
elif ci_failed "$CI_STATE"; then elif ci_failed "$CI_STATE"; then
if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then
@ -601,45 +375,21 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
else else
# Increment at actual launch time (not on guard-hit paths) # Increment at actual launch time (not on guard-hit paths)
if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then
BLOCKED_BY_INPROGRESS=true # exhausted between check and launch exit 0 # exhausted between check and launch
else fi
log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)"
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)"
BLOCKED_BY_INPROGRESS=true exit 0
fi
fi fi
else else
log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)"
BLOCKED_BY_INPROGRESS=true
fi
fi fi
else else
# Check assignee before adopting orphaned issue log "recovering orphaned issue #${ISSUE_NUM} (no PR found)"
ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${ISSUE_NUM}") || true
ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true
if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then
log "issue #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping (not orphaned)"
# Remove in-progress label since this agent isn't working on it
IP_ID=$(_ilc_in_progress_id)
curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true
# Don't block — fall through to backlog
else
log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})"
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)"
BLOCKED_BY_INPROGRESS=true
fi
fi
fi
fi
# If blocked by in-progress work, exit now
if [ "$BLOCKED_BY_INPROGRESS" = true ]; then
exit 0 exit 0
fi fi
fi fi
@ -709,14 +459,6 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do
# Stuck: REQUEST_CHANGES or CI failure -> spawn agent # Stuck: REQUEST_CHANGES or CI failure -> spawn agent
if [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then if [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then
# Check if issue is assigned to this agent — skip if assigned to another bot
ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${STUCK_ISSUE}") || true
assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true
if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then
log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) REQUEST_CHANGES but assigned to ${assignee} — skipping"
continue # skip this PR, check next stuck PR or fall through to backlog
fi
log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) has REQUEST_CHANGES — fixing first" log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) has REQUEST_CHANGES — fixing first"
nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 & nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 &
log "started dev-agent PID $! for stuck PR #${PR_NUM}" log "started dev-agent PID $! for stuck PR #${PR_NUM}"
@ -779,18 +521,9 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do
ISSUE_NUM=$(echo "$BACKLOG_JSON" | jq -r ".[$i].number") ISSUE_NUM=$(echo "$BACKLOG_JSON" | jq -r ".[$i].number")
ISSUE_BODY=$(echo "$BACKLOG_JSON" | jq -r ".[$i].body // \"\"") ISSUE_BODY=$(echo "$BACKLOG_JSON" | jq -r ".[$i].body // \"\"")
# Check assignee before claiming — skip if assigned to another bot
ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${ISSUE_NUM}") || true
ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true
if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then
log " #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping"
continue
fi
# Formula guard: formula-labeled issues must not be picked up by dev-agent. # Formula guard: formula-labeled issues must not be picked up by dev-agent.
ISSUE_LABELS=$(echo "$BACKLOG_JSON" | jq -r ".[$i].labels[].name" 2>/dev/null) || true ISSUE_LABELS=$(echo "$BACKLOG_JSON" | jq -r ".[$i].labels[].name" 2>/dev/null) || true
SKIP_LABEL=$(echo "$ISSUE_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true SKIP_LABEL=$(echo "$ISSUE_LABELS" | grep -oE '^(formula|action|prediction/dismissed|prediction/unreviewed)$' | head -1) || true
if [ -n "$SKIP_LABEL" ]; then if [ -n "$SKIP_LABEL" ]; then
log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — skipping in backlog scan" log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — skipping in backlog scan"
continue continue
@ -807,26 +540,6 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do
'.[] | select((.head.ref == $branch) or (.title | contains($num))) | .number' | head -1) || true '.[] | select((.head.ref == $branch) or (.title | contains($num))) | .number' | head -1) || true
if [ -n "$EXISTING_PR" ]; then if [ -n "$EXISTING_PR" ]; then
# Check if branch is stale (behind primary branch)
BRANCH="fix/issue-${ISSUE_NUM}"
AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0")
if [ "$AHEAD" -gt 0 ]; then
log "issue #${ISSUE_NUM} PR #${EXISTING_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR"
# Close the PR via API
curl -sf -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/pulls/${EXISTING_PR}" \
-d '{"state":"closed"}' >/dev/null 2>&1 || true
# Delete the branch via git push
git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true
# Reset to fresh start on primary branch
git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true
git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true
# Continue to find another ready issue
continue
fi
PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls/${EXISTING_PR}" | jq -r '.head.sha') || true "${API}/pulls/${EXISTING_PR}" | jq -r '.head.sha') || true
CI_STATE=$(ci_commit_status "$PR_SHA") || true CI_STATE=$(ci_commit_status "$PR_SHA") || true
@ -884,33 +597,10 @@ done
# Single-threaded per project: if any issue has an open PR waiting for review/CI, # Single-threaded per project: if any issue has an open PR waiting for review/CI,
# don't start new work — let the pipeline drain first # don't start new work — let the pipeline drain first
# But only block on PRs assigned to this agent (per-agent logic from #358)
if [ -n "$READY_ISSUE" ] && [ -n "${WAITING_PRS:-}" ]; then if [ -n "$READY_ISSUE" ] && [ -n "${WAITING_PRS:-}" ]; then
# Filter to only this agent's waiting PRs log "holding #${READY_ISSUE} — waiting for open PR(s) to land first: ${WAITING_PRS}"
MY_WAITING_PRS=""
for pr_num in $(echo "$WAITING_PRS" | tr ',' ' '); do
pr_num="${pr_num#\#}" # Remove leading #
# Check if this PR's issue is assigned to this agent
pr_info=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls/${pr_num}" 2>/dev/null) || true
pr_branch=$(echo "$pr_info" | jq -r '.head.ref') || true
issue_num=$(echo "$pr_branch" | grep -oP '(?<=fix/issue-)\d+' || true)
if [ -z "$issue_num" ]; then
continue
fi
issue_assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${issue_num}" 2>/dev/null | jq -r '.assignee.login // ""') || true
if [ -n "$issue_assignee" ] && [ "$issue_assignee" = "$BOT_USER" ]; then
MY_WAITING_PRS="${MY_WAITING_PRS:-}${MY_WAITING_PRS:+, }#${pr_num}"
fi
done
if [ -n "$MY_WAITING_PRS" ]; then
log "holding #${READY_ISSUE} — waiting for my open PR(s) to land first: ${MY_WAITING_PRS}"
exit 0 exit 0
fi fi
log "other agents' PRs waiting: ${WAITING_PRS} — proceeding with #${READY_ISSUE}"
fi
if [ -z "$READY_ISSUE" ]; then if [ -z "$READY_ISSUE" ]; then
log "no ready issues (all blocked by unmerged deps)" log "no ready issues (all blocked by unmerged deps)"

820
dev/phase-handler.sh Normal file
View file

@ -0,0 +1,820 @@
#!/usr/bin/env bash
# dev/phase-handler.sh — Phase callback functions for dev-agent.sh
#
# Source this file from agent orchestrators after lib/agent-session.sh is loaded.
# Defines: post_refusal_comment(), _on_phase_change(), build_phase_protocol_prompt()
#
# Required globals (set by calling agent before or after sourcing):
# ISSUE, FORGE_TOKEN, API, FORGE_WEB, PROJECT_NAME, FACTORY_ROOT
# BRANCH, PHASE_FILE, WORKTREE, IMPL_SUMMARY_FILE
# PRIMARY_BRANCH, SESSION_NAME, LOGFILE, ISSUE_TITLE
# WOODPECKER_REPO_ID, WOODPECKER_TOKEN, WOODPECKER_SERVER
#
# Globals with defaults (agents can override after sourcing):
# PR_NUMBER, CI_POLL_TIMEOUT, MAX_CI_FIXES, MAX_REVIEW_ROUNDS,
# REVIEW_POLL_TIMEOUT, CI_RETRY_COUNT, CI_FIX_COUNT, REVIEW_ROUND,
# CLAIMED, PHASE_POLL_INTERVAL
#
# Calls back to agent-defined helpers:
# cleanup_worktree(), cleanup_labels(), status(), log()
#
# shellcheck shell=bash
# shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling
# shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh
# Load secret scanner for redacting tmux output before posting to issues
# shellcheck source=../lib/secret-scan.sh
source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh"
# Load shared CI helpers (is_infra_step, classify_pipeline_failure, etc.)
# shellcheck source=../lib/ci-helpers.sh
source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh"
# Load mirror push helper
# shellcheck source=../lib/mirrors.sh
source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh"
# --- Default callback stubs (agents can override after sourcing) ---
# cleanup_worktree and cleanup_labels are called during phase transitions.
# Provide no-op defaults so phase-handler.sh is self-contained; sourcing
# agents override these with real implementations.
if ! declare -f cleanup_worktree >/dev/null 2>&1; then
cleanup_worktree() { :; }
fi
if ! declare -f cleanup_labels >/dev/null 2>&1; then
cleanup_labels() { :; }
fi
# --- Default globals (agents can override after sourcing) ---
: "${CI_POLL_TIMEOUT:=1800}"
: "${REVIEW_POLL_TIMEOUT:=10800}"
: "${MAX_CI_FIXES:=3}"
: "${MAX_REVIEW_ROUNDS:=5}"
: "${CI_RETRY_COUNT:=0}"
: "${CI_FIX_COUNT:=0}"
: "${REVIEW_ROUND:=0}"
: "${PR_NUMBER:=}"
: "${CLAIMED:=false}"
: "${PHASE_POLL_INTERVAL:=30}"
# --- Post diagnostic comment + label issue as blocked ---
# Captures tmux pane output, posts a structured comment on the issue, removes
# in-progress label, and adds the "blocked" label.
#
# Args: reason [session_name]
# Uses globals: ISSUE, SESSION_NAME, PR_NUMBER, FORGE_TOKEN, API
post_blocked_diagnostic() {
local reason="$1"
local session="${2:-${SESSION_NAME:-}}"
# Capture last 50 lines from tmux pane (before kill)
local tmux_output=""
if [ -n "$session" ] && tmux has-session -t "$session" 2>/dev/null; then
tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true)
fi
# Redact any secrets from tmux output before posting to issue
if [ -n "$tmux_output" ]; then
tmux_output=$(redact_secrets "$tmux_output")
fi
# Build diagnostic comment body
local comment
comment="### Session failure diagnostic
| Field | Value |
|---|---|
| Exit reason | \`${reason}\` |
| Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` |"
[ -n "${PR_NUMBER:-}" ] && [ "${PR_NUMBER:-0}" != "0" ] && \
comment="${comment}
| PR | #${PR_NUMBER} |"
if [ -n "$tmux_output" ]; then
comment="${comment}
<details><summary>Last 50 lines from tmux pane</summary>
\`\`\`
${tmux_output}
\`\`\`
</details>"
fi
# Post comment to issue
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}/comments" \
-d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true
# Remove in-progress, add blocked
cleanup_labels
local blocked_id
blocked_id=$(ensure_blocked_label_id)
if [ -n "$blocked_id" ]; then
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}/labels" \
-d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true
fi
CLAIMED=false
_BLOCKED_POSTED=true
}
# --- Build phase protocol prompt (shared across agents) ---
# Generates the phase-signaling instructions for Claude prompts.
# Args: phase_file summary_file branch [remote]
# Output: The protocol text (stdout)
build_phase_protocol_prompt() {
local _pf="$1" _sf="$2" _br="$3" _remote="${4:-${FORGE_REMOTE:-origin}}"
cat <<_PHASE_PROTOCOL_EOF_
## Phase-Signaling Protocol (REQUIRED)
You are running in a persistent tmux session managed by an orchestrator.
Communicate progress by writing to the phase file. The orchestrator watches
this file and injects events (CI results, review feedback) back into this session.
### Key files
\`\`\`
PHASE_FILE="${_pf}"
SUMMARY_FILE="${_sf}"
\`\`\`
### Phase transitions — write these exactly:
**After committing and pushing your branch:**
\`\`\`bash
# Rebase on target branch before push to avoid merge conflicts
git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH}
git push ${_remote} ${_br}
# Write a short summary of what you implemented:
printf '%s' "<your summary>" > "\${SUMMARY_FILE}"
# Signal the orchestrator to create the PR and watch for CI:
echo "PHASE:awaiting_ci" > "${_pf}"
\`\`\`
Then STOP and wait. The orchestrator will inject CI results.
**When you receive a "CI passed" injection:**
\`\`\`bash
echo "PHASE:awaiting_review" > "${_pf}"
\`\`\`
Then STOP and wait. The orchestrator will inject review feedback.
**When you receive a "CI failed:" injection:**
Fix the CI issue, then rebase on target branch and push:
\`\`\`bash
git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH}
git push --force-with-lease ${_remote} ${_br}
echo "PHASE:awaiting_ci" > "${_pf}"
\`\`\`
Then STOP and wait.
**When you receive a "Review: REQUEST_CHANGES" injection:**
Address ALL review feedback, then rebase on target branch and push:
\`\`\`bash
git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH}
git push --force-with-lease ${_remote} ${_br}
echo "PHASE:awaiting_ci" > "${_pf}"
\`\`\`
(CI runs again after each push — always write awaiting_ci, not awaiting_review)
**When you need human help (CI exhausted, merge blocked, stuck on a decision):**
\`\`\`bash
printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}"
\`\`\`
Then STOP and wait. A human will review and respond via the forge.
**On unrecoverable failure:**
\`\`\`bash
printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}"
\`\`\`
_PHASE_PROTOCOL_EOF_
}
# --- Merge helper ---
# do_merge — attempt to merge PR via forge API.
# Args: pr_num
# Returns:
# 0 = merged successfully
# 1 = other failure (conflict, network error, etc.)
# 2 = not enough approvals (HTTP 405) — PHASE:escalate already written
do_merge() {
local pr_num="$1"
local merge_response merge_http_code merge_body
merge_response=$(curl -s -w "\n%{http_code}" -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${API}/pulls/${pr_num}/merge" \
-d '{"Do":"merge","delete_branch_after_merge":true}') || true
merge_http_code=$(echo "$merge_response" | tail -1)
merge_body=$(echo "$merge_response" | sed '$d')
if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then
log "do_merge: PR #${pr_num} merged (HTTP ${merge_http_code})"
return 0
fi
# HTTP 405 — could be "merge requirements not met" OR "already merged" (race with dev-poll).
# Before escalating, check whether the PR was already merged by another agent.
if [ "$merge_http_code" = "405" ]; then
local pr_state
pr_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls/${pr_num}" | jq -r '.merged // false') || pr_state="false"
if [ "$pr_state" = "true" ]; then
log "do_merge: PR #${pr_num} already merged (detected after HTTP 405) — treating as success"
return 0
fi
log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}"
printf 'PHASE:escalate\nReason: %s\n' \
"PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \
> "$PHASE_FILE"
return 2
fi
log "do_merge: PR #${pr_num} merge failed (HTTP ${merge_http_code}): ${merge_body:0:200}"
return 1
}
# --- Refusal comment helper ---
post_refusal_comment() {
local emoji="$1" title="$2" body="$3"
local last_has_title
last_has_title=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${ISSUE}/comments?limit=5" | \
jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true
if [ "$last_has_title" = "true" ]; then
log "skipping duplicate refusal comment: ${title}"
return 0
fi
local comment
comment="${emoji} **Dev-agent: ${title}**
${body}
---
*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*"
printf '%s' "$comment" > "/tmp/refusal-comment.txt"
jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json"
curl -sf -o /dev/null -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}/comments" \
--data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \
log "WARNING: failed to post refusal comment"
rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json"
}
# =============================================================================
# PHASE DISPATCH CALLBACK
# =============================================================================
# _on_phase_change — Phase dispatch callback for monitor_phase_loop
# Receives the current phase as $1.
# Returns 0 to continue the loop, 1 to break (terminal phase reached).
_on_phase_change() {
local phase="$1"
# ── PHASE: awaiting_ci ──────────────────────────────────────────────────────
if [ "$phase" = "PHASE:awaiting_ci" ]; then
# Release session lock — Claude is idle during CI polling (#724)
session_lock_release
# Create PR if not yet created
if [ -z "${PR_NUMBER:-}" ]; then
status "creating PR for issue #${ISSUE}"
IMPL_SUMMARY=""
if [ -f "$IMPL_SUMMARY_FILE" ]; then
# Don't treat refusal JSON as a PR summary
if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then
IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE")
fi
fi
printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt"
jq -n \
--arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \
--rawfile body "/tmp/pr-body-${ISSUE}.txt" \
--arg head "$BRANCH" \
--arg base "${PRIMARY_BRANCH}" \
'{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json"
PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/pulls" \
--data-binary @"/tmp/pr-request-${ISSUE}.json")
PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1)
PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d')
rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json"
if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then
PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number')
log "created PR #${PR_NUMBER}"
elif [ "$PR_HTTP_CODE" = "409" ]; then
# PR already exists (race condition) — find it
FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls?state=open&limit=20" | \
jq -r --arg branch "$BRANCH" \
'.[] | select(.head.ref == $branch) | .number' | head -1) || true
if [ -n "$FOUND_PR" ]; then
PR_NUMBER="$FOUND_PR"
log "PR already exists: #${PR_NUMBER}"
else
log "ERROR: PR creation got 409 but no existing PR found"
agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the forge API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed."
return 0
fi
else
log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})"
agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push ${FORGE_REMOTE:-origin} ${BRANCH}. Then write PHASE:awaiting_ci again."
return 0
fi
fi
# No CI configured? Treat as success immediately
if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then
log "no CI configured — treating as passed"
agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER} (no CI configured for this project).
Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback."
return 0
fi
# Poll CI until done or timeout
status "waiting for CI on PR #${PR_NUMBER}"
CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha')
CI_DONE=false
CI_STATE="unknown"
CI_POLL_ELAPSED=0
while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do
sleep 30
CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 ))
# Check session still alive during CI wait (exit_marker + tmux fallback)
if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then
log "session died during CI wait"
break
fi
# Re-fetch HEAD — Claude may have pushed new commits since loop started
CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || echo "$CI_CURRENT_SHA")
CI_STATE=$(ci_commit_status "$CI_CURRENT_SHA")
if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then
CI_DONE=true
[ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0
break
fi
done
if ! $CI_DONE; then
log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s"
agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed."
return 0
fi
log "CI: ${CI_STATE}"
if [ "$CI_STATE" = "success" ]; then
agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER}.
Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback:
echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\""
else
# Fetch CI error details
PIPELINE_NUM=$(ci_pipeline_number "$CI_CURRENT_SHA")
FAILED_STEP=""
FAILED_EXIT=""
IS_INFRA=false
if [ -n "$PIPELINE_NUM" ]; then
FAILED_INFO=$(curl -sf \
-H "Authorization: Bearer ${WOODPECKER_TOKEN}" \
"${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \
jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true)
FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1)
FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2)
fi
log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}"
if [ -n "$FAILED_STEP" ] && is_infra_step "$FAILED_STEP" "${FAILED_EXIT:-0}" >/dev/null 2>&1; then
IS_INFRA=true
fi
if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then
CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 ))
log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})"
(cd "$WORKTREE" && git commit --allow-empty \
-m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1)
# Rebase on target branch before push to avoid merge conflicts
if ! (cd "$WORKTREE" && \
git fetch "${FORGE_REMOTE:-origin}" "${PRIMARY_BRANCH}" 2>/dev/null && \
git rebase "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}" 2>&1 | tail -5); then
log "rebase conflict detected — aborting, agent must resolve"
(cd "$WORKTREE" && git rebase --abort 2>/dev/null || git reset --hard HEAD 2>/dev/null) || true
agent_inject_into_session "$SESSION_NAME" "REBASE CONFLICT: Cannot rebase onto ${PRIMARY_BRANCH} automatically.
Please resolve merge conflicts manually:
1. Check conflict status: git status
2. Resolve conflicts in the conflicted files
3. Stage resolved files: git add <files>
4. Continue rebase: git rebase --continue
If you cannot resolve conflicts, abort: git rebase --abort
Then write PHASE:escalate with a reason."
return 0
fi
# Rebase succeeded — push the result
(cd "$WORKTREE" && git push --force-with-lease "${FORGE_REMOTE:-origin}" "$BRANCH" 2>&1 | tail -3)
# Touch phase file so we recheck CI on the new SHA
# Do NOT update LAST_PHASE_MTIME here — let the main loop detect the fresh mtime
touch "$PHASE_FILE"
CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true)
return 0
fi
CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 ))
_ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}"
if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then
log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating"
printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE"
# Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate
return 0
fi
CI_ERROR_LOG=""
if [ -n "$PIPELINE_NUM" ]; then
CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "")
fi
# Save CI result for crash recovery
printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \
"$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \
> "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true
agent_inject_into_session "$SESSION_NAME" "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}).
Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?})
CI debug tool:
bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0}
bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} <step-name>
Error snippet:
${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.}
Instructions:
1. Run ci-debug.sh failures to get the full error output.
2. Read the failing test file(s) — understand what the tests EXPECT.
3. Fix the root cause — do NOT weaken tests.
4. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}
git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH}
5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
6. Stop and wait."
fi
# ── PHASE: awaiting_review ──────────────────────────────────────────────────
elif [ "$phase" = "PHASE:awaiting_review" ]; then
# Release session lock — Claude is idle during review wait (#724)
session_lock_release
status "waiting for review on PR #${PR_NUMBER:-?}"
CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle
if [ -z "${PR_NUMBER:-}" ]; then
log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR"
FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls?state=open&limit=20" | \
jq -r --arg branch "$BRANCH" \
'.[] | select(.head.ref == $branch) | .number' | head -1) || true
if [ -n "$FOUND_PR" ]; then
PR_NUMBER="$FOUND_PR"
log "found PR #${PR_NUMBER}"
else
agent_inject_into_session "$SESSION_NAME" "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push ${FORGE_REMOTE:-origin} ${BRANCH}, then write PHASE:awaiting_ci."
return 0
fi
fi
REVIEW_POLL_ELAPSED=0
REVIEW_FOUND=false
while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do
sleep 300 # 5 min between review checks
REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 ))
# Check session still alive (exit_marker + tmux fallback)
if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then
log "session died during review wait"
REVIEW_FOUND=false
break
fi
# Check if phase was updated while we wait (e.g., Claude reacted to something)
NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0)
if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then
log "phase file updated during review wait — re-entering main loop"
# Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer
# loop detects the change on its next tick and dispatches the new phase.
REVIEW_FOUND=true # Prevent timeout injection
# Clean up review-poll sentinel if it exists (session already advanced)
rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
break
fi
REVIEW_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true
REVIEW_COMMENT=$(forge_api_all "/issues/${PR_NUMBER}/comments" | \
jq -r --arg sha "$REVIEW_SHA" \
'[.[] | select(.body | contains("<!-- reviewed: " + $sha))] | last // empty') || true
if [ -n "$REVIEW_COMMENT" ] && [ "$REVIEW_COMMENT" != "null" ]; then
REVIEW_TEXT=$(echo "$REVIEW_COMMENT" | jq -r '.body')
# Skip error reviews — they have no verdict
if echo "$REVIEW_TEXT" | grep -q "review-error\|Review — Error"; then
log "review was an error, waiting for re-review"
continue
fi
VERDICT=$(echo "$REVIEW_TEXT" | grep -oP '\*\*(APPROVE|REQUEST_CHANGES|DISCUSS)\*\*' | head -1 | tr -d '*' || true)
log "review verdict: ${VERDICT:-unknown}"
# Also check formal forge reviews
if [ -z "$VERDICT" ]; then
VERDICT=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls/${PR_NUMBER}/reviews" | \
jq -r '[.[] | select(.stale == false)] | last | .state // empty' || true)
if [ "$VERDICT" = "APPROVED" ]; then
VERDICT="APPROVE"
elif [ "$VERDICT" != "REQUEST_CHANGES" ]; then
VERDICT=""
fi
[ -n "$VERDICT" ] && log "verdict from formal review: $VERDICT"
fi
# Skip injection if review-poll.sh already injected (sentinel present).
# Exception: APPROVE always falls through so do_merge() runs even when
# review-poll injected first — prevents Claude writing PHASE:done on a
# failed merge without the orchestrator detecting the error.
REVIEW_SENTINEL="/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
if [ -n "$VERDICT" ] && [ -f "$REVIEW_SENTINEL" ] && [ "$VERDICT" != "APPROVE" ]; then
log "review already injected by review-poll (sentinel exists) — skipping"
rm -f "$REVIEW_SENTINEL"
REVIEW_FOUND=true
break
fi
rm -f "$REVIEW_SENTINEL" # consume sentinel before APPROVE handling below
if [ "$VERDICT" = "APPROVE" ]; then
REVIEW_FOUND=true
_merge_rc=0; do_merge "$PR_NUMBER" || _merge_rc=$?
if [ "$_merge_rc" -eq 0 ]; then
# Merge succeeded — close issue and signal done
curl -sf -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${API}/issues/${ISSUE}" \
-d '{"state":"closed"}' >/dev/null 2>&1 || true
# Pull merged primary branch and push to mirrors
git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE:-origin}" "$PRIMARY_BRANCH" 2>/dev/null || true
git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true
git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE:-origin}" "$PRIMARY_BRANCH" 2>/dev/null || true
mirror_push
printf 'PHASE:done\n' > "$PHASE_FILE"
elif [ "$_merge_rc" -ne 2 ]; then
# Other merge failure (conflict, etc.) — delegate to Claude for rebase + retry
agent_inject_into_session "$SESSION_NAME" "Approved! PR #${PR_NUMBER} has been approved, but the merge failed (likely conflicts).
Rebase onto ${PRIMARY_BRANCH} and push:
git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}
git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH}
echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
Do NOT merge or close the issue — the orchestrator handles that after CI passes.
If rebase repeatedly fails, write PHASE:escalate with a reason."
fi
# _merge_rc=2: PHASE:escalate already written by do_merge()
break
elif [ "$VERDICT" = "REQUEST_CHANGES" ] || [ "$VERDICT" = "DISCUSS" ]; then
REVIEW_ROUND=$(( REVIEW_ROUND + 1 ))
if [ "$REVIEW_ROUND" -ge "$MAX_REVIEW_ROUNDS" ]; then
log "hit max review rounds (${MAX_REVIEW_ROUNDS})"
log "PR #${PR_NUMBER}: hit ${MAX_REVIEW_ROUNDS} review rounds, needs human attention"
fi
REVIEW_FOUND=true
agent_inject_into_session "$SESSION_NAME" "Review feedback (round ${REVIEW_ROUND}) on PR #${PR_NUMBER}:
${REVIEW_TEXT}
Instructions:
1. Address each piece of feedback carefully.
2. Run lint and tests when done.
3. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}
git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH}
4. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
5. Stop and wait for the next CI result."
log "review REQUEST_CHANGES received (round ${REVIEW_ROUND})"
break
else
# No verdict found in comment or formal review — keep waiting
log "review comment found but no verdict, continuing to wait"
continue
fi
fi
# Check if PR was merged or closed externally
PR_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API}/pulls/${PR_NUMBER}") || true
PR_STATE=$(echo "$PR_JSON" | jq -r '.state // "unknown"')
PR_MERGED=$(echo "$PR_JSON" | jq -r '.merged // false')
if [ "$PR_STATE" != "open" ]; then
if [ "$PR_MERGED" = "true" ]; then
log "PR #${PR_NUMBER} was merged externally"
curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}" -d '{"state":"closed"}' >/dev/null 2>&1 || true
cleanup_labels
agent_kill_session "$SESSION_NAME"
cleanup_worktree
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}"
exit 0
else
log "PR #${PR_NUMBER} was closed WITHOUT merge — NOT closing issue"
cleanup_labels
agent_kill_session "$SESSION_NAME"
cleanup_worktree
exit 0
fi
fi
log "waiting for review on PR #${PR_NUMBER} (${REVIEW_POLL_ELAPSED}s elapsed)"
done
if ! $REVIEW_FOUND && [ "$REVIEW_POLL_ELAPSED" -ge "$REVIEW_POLL_TIMEOUT" ]; then
log "TIMEOUT: no review after 3h"
agent_inject_into_session "$SESSION_NAME" "TIMEOUT: No review received after 3 hours for PR #${PR_NUMBER}. Write PHASE:escalate to escalate to a human reviewer."
fi
# ── PHASE: escalate ──────────────────────────────────────────────────────
elif [ "$phase" = "PHASE:escalate" ]; then
status "escalated — waiting for human input on issue #${ISSUE}"
ESCALATE_REASON=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null | sed 's/^Reason: //' || echo "")
log "phase: escalate — reason: ${ESCALATE_REASON:-none}"
# Session stays alive — human input arrives via vault/forge
# ── PHASE: done ─────────────────────────────────────────────────────────────
# PR merged and issue closed (by orchestrator or Claude). Just clean up local state.
elif [ "$phase" = "PHASE:done" ]; then
if [ -n "${PR_NUMBER:-}" ]; then
status "phase done — PR #${PR_NUMBER} merged, cleaning up"
else
status "phase done — issue #${ISSUE} complete, cleaning up"
fi
# Belt-and-suspenders: ensure in-progress label removed (idempotent)
cleanup_labels
# Local cleanup
agent_kill_session "$SESSION_NAME"
cleanup_worktree
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
CLAIMED=false # Don't unclaim again in cleanup()
# ── PHASE: failed ───────────────────────────────────────────────────────────
elif [ "$phase" = "PHASE:failed" ]; then
if [[ -f "$PHASE_FILE" ]]; then
FAILURE_REASON=$(sed -n '2p' "$PHASE_FILE" | sed 's/^Reason: //')
fi
FAILURE_REASON="${FAILURE_REASON:-unspecified}"
log "phase: failed — reason: ${FAILURE_REASON}"
# Gitea labels API requires []int64 — look up the "backlog" label ID once
BACKLOG_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \
| jq -r '.[] | select(.name == "backlog") | .id' 2>/dev/null || true)
BACKLOG_LABEL_ID="${BACKLOG_LABEL_ID:-1300815}"
UNDERSPECIFIED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \
| jq -r '.[] | select(.name == "underspecified") | .id' 2>/dev/null || true)
UNDERSPECIFIED_LABEL_ID="${UNDERSPECIFIED_LABEL_ID:-1300816}"
# Check if this is a refusal (Claude wrote refusal JSON to IMPL_SUMMARY_FILE)
REFUSAL_JSON=""
if [ -f "$IMPL_SUMMARY_FILE" ] && jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then
REFUSAL_JSON=$(cat "$IMPL_SUMMARY_FILE")
fi
if [ -n "$REFUSAL_JSON" ] && [ "$FAILURE_REASON" = "refused" ]; then
REFUSAL_STATUS=$(printf '%s' "$REFUSAL_JSON" | jq -r '.status')
log "claude refused: ${REFUSAL_STATUS}"
# Write preflight result for dev-poll.sh
printf '%s' "$REFUSAL_JSON" > "$PREFLIGHT_RESULT"
# Unclaim issue (restore backlog label, remove in-progress)
cleanup_labels
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}/labels" \
-d "{\"labels\":[${BACKLOG_LABEL_ID}]}" >/dev/null 2>&1 || true
case "$REFUSAL_STATUS" in
unmet_dependency)
BLOCKED_BY_MSG=$(printf '%s' "$REFUSAL_JSON" | jq -r '.blocked_by // "unknown"')
SUGGESTION=$(printf '%s' "$REFUSAL_JSON" | jq -r '.suggestion // empty')
COMMENT_BODY="### Blocked by unmet dependency
${BLOCKED_BY_MSG}"
if [ -n "$SUGGESTION" ] && [ "$SUGGESTION" != "null" ]; then
COMMENT_BODY="${COMMENT_BODY}
**Suggestion:** Work on #${SUGGESTION} first."
fi
post_refusal_comment "🚧" "Unmet dependency" "$COMMENT_BODY"
;;
too_large)
REASON=$(printf '%s' "$REFUSAL_JSON" | jq -r '.reason // "unspecified"')
post_refusal_comment "📏" "Too large for single session" "### Why this can't be implemented as-is
${REASON}
### Next steps
A maintainer should split this issue or add more detail to the spec."
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}/labels" \
-d "{\"labels\":[${UNDERSPECIFIED_LABEL_ID}]}" >/dev/null 2>&1 || true
curl -sf -X DELETE \
-H "Authorization: token ${FORGE_TOKEN}" \
"${API}/issues/${ISSUE}/labels/${BACKLOG_LABEL_ID}" >/dev/null 2>&1 || true
;;
already_done)
REASON=$(printf '%s' "$REFUSAL_JSON" | jq -r '.reason // "unspecified"')
post_refusal_comment "✅" "Already implemented" "### Existing implementation
${REASON}
Closing as already implemented."
curl -sf -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}" \
-d '{"state":"closed"}' >/dev/null 2>&1 || true
;;
*)
post_refusal_comment "❓" "Unable to proceed" "The dev-agent could not process this issue.
Raw response:
\`\`\`json
$(printf '%s' "$REFUSAL_JSON" | head -c 2000)
\`\`\`"
;;
esac
CLAIMED=false # Don't unclaim again in cleanup()
agent_kill_session "$SESSION_NAME"
cleanup_worktree
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
return 1
else
# Genuine unrecoverable failure — label blocked with diagnostic
log "session failed: ${FAILURE_REASON}"
post_blocked_diagnostic "$FAILURE_REASON"
agent_kill_session "$SESSION_NAME"
if [ -n "${PR_NUMBER:-}" ]; then
log "keeping worktree (PR #${PR_NUMBER} still open)"
else
cleanup_worktree
fi
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
return 1
fi
# ── PHASE: crashed ──────────────────────────────────────────────────────────
# Session died unexpectedly (OOM kill, tmux crash, etc.). Label blocked with
# diagnostic comment so humans can triage directly on the issue.
elif [ "$phase" = "PHASE:crashed" ]; then
log "session crashed for issue #${ISSUE}"
post_blocked_diagnostic "crashed"
log "PRESERVED crashed worktree for debugging: $WORKTREE"
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
else
log "WARNING: unknown phase value: ${phase}"
fi
}

View file

@ -8,13 +8,8 @@
set -euo pipefail set -euo pipefail
# Inline read_phase() function (previously from lib/agent-session.sh) # Source canonical read_phase() from shared library
# Read the current phase from a phase file, stripped of whitespace. source "$(dirname "$0")/../lib/agent-session.sh"
# Usage: read_phase [file] — defaults to $PHASE_FILE
read_phase() {
local file="${1:-${PHASE_FILE:-}}"
{ cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]'
}
PROJECT="testproject" PROJECT="testproject"
ISSUE="999" ISSUE="999"
@ -89,7 +84,7 @@ else
fail "PHASE:failed format: first='$first_line' second='$second_line'" fail "PHASE:failed format: first='$first_line' second='$second_line'"
fi fi
# ── Test 5: orchestrator read function (inline read_phase) # ── Test 5: orchestrator read function (canonical read_phase from lib/agent-session.sh)
echo "PHASE:awaiting_ci" > "$PHASE_FILE" echo "PHASE:awaiting_ci" > "$PHASE_FILE"
phase=$(read_phase "$PHASE_FILE") phase=$(read_phase "$PHASE_FILE")
if [ "$phase" = "PHASE:awaiting_ci" ]; then if [ "$phase" = "PHASE:awaiting_ci" ]; then

View file

@ -1,28 +1,209 @@
--- ---
name: disinto-factory name: disinto-factory
description: Set up and operate a disinto autonomous code factory. description: Set up and operate a disinto autonomous code factory. Use when bootstrapping a new factory instance, checking on agents and CI, managing the backlog, or troubleshooting the stack.
--- ---
# Disinto Factory # Disinto Factory
You are helping the user set up and operate a **disinto autonomous code factory**. You are helping the user set up and operate a **disinto autonomous code factory** — a system
of bash scripts and Claude CLI that automates the full development lifecycle: picking up
issues, implementing via Claude, creating PRs, running CI, reviewing, merging, and mirroring.
## Guides ## First-time setup
- **[Setup guide](setup.md)** — First-time factory setup: environment, init, verification, backlog seeding Walk the user through these steps interactively. Ask questions where marked with [ASK].
- **[Operations guide](operations.md)** — Day-to-day: status checks, CI debugging, unsticking issues, Forgejo access
- **[Lessons learned](lessons-learned.md)** — Patterns for writing issues, debugging CI, retrying failures, vault operations, breaking down features ### 1. Environment
[ASK] Where will the factory run? Options:
- **LXD container** (recommended for isolation) — need Debian 12, Docker, nesting enabled
- **Bare VM or server** — need Debian/Ubuntu with Docker
- **Existing container** — check prerequisites
Verify prerequisites:
```bash
docker --version && git --version && jq --version && curl --version && tmux -V && python3 --version && claude --version
```
Any missing tool — help the user install it before continuing.
### 2. Clone and init
```bash
git clone https://codeberg.org/johba/disinto.git && cd disinto
```
[ASK] What repo should the factory develop? Options:
- **Itself** (self-development): `bin/disinto init https://codeberg.org/johba/disinto --yes --repo-root $(pwd)`
- **Another project**: `bin/disinto init <repo-url> --yes`
Run the init and watch for:
- All bot users created (dev-bot, review-bot, etc.)
- `WOODPECKER_TOKEN` generated and saved
- Stack containers all started
### 3. Post-init verification
Run this checklist — fix any failures before proceeding:
```bash
# Stack healthy?
docker ps --format "table {{.Names}}\t{{.Status}}"
# Expected: forgejo, woodpecker (healthy), woodpecker-agent (healthy), agents, edge, staging
# Token generated?
grep WOODPECKER_TOKEN .env | grep -v "^$" && echo "OK" || echo "MISSING — see references/troubleshooting.md"
# Agent cron active?
docker exec -u agent disinto-agents-1 crontab -l -u agent
# Agent can reach Forgejo?
docker exec disinto-agents-1 bash -c "source /home/agent/disinto/.env && curl -sf http://forgejo:3000/api/v1/version | jq .version"
# Agent repo cloned?
docker exec -u agent disinto-agents-1 ls /home/agent/repos/
```
If the agent repo is missing, clone it:
```bash
docker exec disinto-agents-1 chown -R agent:agent /home/agent/repos
docker exec -u agent disinto-agents-1 bash -c "source /home/agent/disinto/.env && git clone http://dev-bot:\${FORGE_TOKEN}@forgejo:3000/<org>/<repo>.git /home/agent/repos/<name>"
```
### 4. Mirrors (optional)
[ASK] Should the factory mirror to external forges? If yes, which?
- GitHub: need repo URL and SSH key added to GitHub account
- Codeberg: need repo URL and SSH key added to Codeberg account
Show the user their public key:
```bash
cat ~/.ssh/id_ed25519.pub
```
Test SSH access:
```bash
ssh -T git@github.com 2>&1; ssh -T git@codeberg.org 2>&1
```
If SSH host keys are missing: `ssh-keyscan github.com codeberg.org >> ~/.ssh/known_hosts 2>/dev/null`
Edit `projects/<name>.toml` to add mirrors:
```toml
[mirrors]
github = "git@github.com:Org/repo.git"
codeberg = "git@codeberg.org:user/repo.git"
```
Test with a manual push:
```bash
source .env && source lib/env.sh && export PROJECT_TOML=projects/<name>.toml && source lib/load-project.sh && source lib/mirrors.sh && mirror_push
```
### 5. Seed the backlog
[ASK] What should the factory work on first? Brainstorm with the user.
Help them create issues on the local Forgejo. Each issue needs:
- A clear title prefixed with `fix:`, `feat:`, or `chore:`
- A body describing what to change, which files, and any constraints
- The `backlog` label (so the dev-agent picks it up)
```bash
source .env
BACKLOG_ID=$(curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/labels" \
-H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | select(.name=="backlog") | .id')
curl -sf -X POST "http://localhost:3000/api/v1/repos/<org>/<repo>/issues" \
-H "Authorization: token $FORGE_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"title\": \"<title>\", \"body\": \"<body>\", \"labels\": [$BACKLOG_ID]}"
```
For issues with dependencies, add `Depends-on: #N` in the body — the dev-agent checks
these before starting.
Use labels:
- `backlog` — ready for the dev-agent
- `blocked` — parked, not for the factory
- No label — tracked but not for autonomous work
### 6. Watch it work
The dev-agent polls every 5 minutes. Trigger manually to see it immediately:
```bash
docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/disinto && bash dev/dev-poll.sh projects/<name>.toml"
```
Then monitor:
```bash
# Watch the agent work
docker exec disinto-agents-1 tail -f /home/agent/data/logs/dev/dev-agent.log
# Check for Claude running
docker exec disinto-agents-1 bash -c "for f in /proc/[0-9]*/cmdline; do cmd=\$(tr '\0' ' ' < \$f 2>/dev/null); echo \$cmd | grep -q 'claude.*-p' && echo 'Claude is running'; done"
```
## Ongoing operations
### Check factory status
```bash
source .env
# Issues
curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/issues?state=open" \
-H "Authorization: token $FORGE_TOKEN" \
| jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"'
# PRs
curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/pulls?state=open" \
-H "Authorization: token $FORGE_TOKEN" \
| jq -r '.[] | "PR #\(.number) [\(.head.ref)] \(.title)"'
# Agent logs
docker exec disinto-agents-1 tail -20 /home/agent/data/logs/dev/dev-agent.log
```
### Check CI
```bash
source .env
WP_CSRF=$(curl -sf -b "user_sess=$WOODPECKER_TOKEN" http://localhost:8000/web-config.js \
| sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p')
curl -sf -b "user_sess=$WOODPECKER_TOKEN" -H "X-CSRF-Token: $WP_CSRF" \
"http://localhost:8000/api/repos/1/pipelines?page=1&per_page=5" \
| jq '.[] | {number, status, event}'
```
### Unstick a blocked issue
When a dev-agent run fails (CI timeout, implementation error), the issue gets labeled `blocked`:
1. Close stale PR and delete the branch
2. `docker exec disinto-agents-1 rm -f /tmp/dev-agent-*.json /tmp/dev-agent-*.lock`
3. Relabel the issue to `backlog`
4. Update agent repo: `docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/repos/<name> && git fetch origin && git reset --hard origin/main"`
### Access Forgejo UI
If running in an LXD container with reverse tunnel:
```bash
# From your machine:
ssh -L 3000:localhost:13000 user@jump-host
# Open http://localhost:3000
```
Reset admin password if needed:
```bash
docker exec disinto-forgejo-1 su -c "forgejo admin user change-password --username disinto-admin --password <new-pw> --must-change-password=false" git
```
## Important context ## Important context
- Read `AGENTS.md` for per-agent architecture and file-level docs - Read `AGENTS.md` for per-agent architecture and file-level docs
- Read `VISION.md` for project philosophy - Read `VISION.md` for project philosophy
- The factory uses a single internal Forgejo as its forge, regardless of where mirrors go - The factory uses a single internal Forgejo as its forge, regardless of where mirrors go
- Dev-agent uses `claude -p` for one-shot implementation sessions - Dev-agent uses `claude -p --resume` for session continuity across CI/review cycles
- Mirror pushes happen automatically after every merge - Mirror pushes happen automatically after every merge (fire-and-forget)
- Polling loop in `docker/agents/entrypoint.sh`: dev-poll/review-poll every 5m, gardener/architect every 6h, planner every 12h, predictor every 24h - Cron schedule: dev-poll every 5min, review-poll every 5min, gardener 4x/day
## References
- [Troubleshooting](references/troubleshooting.md)
- [Factory status script](scripts/factory-status.sh)

View file

@ -1,35 +0,0 @@
# Lessons learned
## Remediation & deployment
**Escalate gradually.** Cheapest fix first, re-measure, escalate only if it persists. Single-shot fixes are either too weak or cause collateral damage.
**Parameterize deployment boundaries.** Entrypoint references to a specific project name are config values waiting to escape. `${VAR:-default}` preserves compat and unlocks reuse.
**Fail loudly over silent defaults.** A fatal error with a clear message beats a wrong default that appears to work.
**Audit the whole file when fixing one value.** Hardcoded assumptions cluster. Fixing one while leaving siblings produces multi-commit churn.
## Documentation
**Per-context rewrites, not batch replacement.** Each doc mention sits in a different narrative. Blanket substitution produces awkward text.
**Search for implicit references too.** After keyword matches, check for instructions that assume the old mechanism without naming it.
## Code review
**Approval means "safe to ship," not "how I'd write it."** Distinguish "wrong" from "different" — only the former blocks.
**Scale scrutiny to blast radius.** A targeted fix warrants less ceremony than a cross-cutting refactor.
**Be specific; separate blockers from preferences.** Concrete observations invite fixes; vague concerns invite debate.
**Read diffs top-down: intent, behavior, edge cases.** Verify the change matches its stated goal before examining lines.
## Issue authoring & retry
**Self-contained issue bodies.** The agent reads the body, not comments. On retry, update the body with exact error and fix guidance.
**Clean stale branches before retry.** Old branches trigger recovery on stale code. Close PR, delete branch, relabel.
**Diagnose CI failures externally.** The agent sees pass/fail, not logs. After repeated failures, read logs yourself and put findings in the issue.

View file

@ -1,54 +0,0 @@
# Ongoing operations
### Check factory status
```bash
source .env
# Issues
curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/issues?state=open" \
-H "Authorization: token $FORGE_TOKEN" \
| jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"'
# PRs
curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/pulls?state=open" \
-H "Authorization: token $FORGE_TOKEN" \
| jq -r '.[] | "PR #\(.number) [\(.head.ref)] \(.title)"'
# Agent logs
docker exec disinto-agents-1 tail -20 /home/agent/data/logs/dev/dev-agent.log
```
### Check CI
```bash
source .env
WP_CSRF=$(curl -sf -b "user_sess=$WOODPECKER_TOKEN" http://localhost:8000/web-config.js \
| sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p')
curl -sf -b "user_sess=$WOODPECKER_TOKEN" -H "X-CSRF-Token: $WP_CSRF" \
"http://localhost:8000/api/repos/1/pipelines?page=1&per_page=5" \
| jq '.[] | {number, status, event}'
```
### Unstick a blocked issue
When a dev-agent run fails (CI timeout, implementation error), the issue gets labeled `blocked`:
1. Close stale PR and delete the branch
2. `docker exec disinto-agents-1 rm -f /tmp/dev-agent-*.json /tmp/dev-agent-*.lock`
3. Relabel the issue to `backlog`
4. Update agent repo: `docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/repos/<name> && git fetch origin && git reset --hard origin/main"`
### Access Forgejo UI
If running in an LXD container with reverse tunnel:
```bash
# From your machine:
ssh -L 3000:localhost:13000 user@jump-host
# Open http://localhost:3000
```
Reset admin password if needed:
```bash
docker exec disinto-forgejo-1 su -c "forgejo admin user change-password --username disinto-admin --password <new-pw> --must-change-password=false" git
```

View file

@ -1,191 +0,0 @@
# First-time setup
Walk the user through these steps interactively. Ask questions where marked with [ASK].
### 1. Environment
[ASK] Where will the factory run? Options:
- **LXD container** (recommended for isolation) — need Debian 12, Docker, nesting enabled
- **Bare VM or server** — need Debian/Ubuntu with Docker
- **Existing container** — check prerequisites
Verify prerequisites:
```bash
docker --version && git --version && jq --version && curl --version && tmux -V && python3 --version && claude --version
```
Any missing tool — help the user install it before continuing.
### 2. Clone disinto and choose a target project
Clone the disinto factory itself:
```bash
git clone https://codeberg.org/johba/disinto.git && cd disinto
```
[ASK] What repository should the factory develop? Provide the **remote repository URL** in one of these formats:
- Full URL: `https://github.com/johba/harb.git` or `https://codeberg.org/johba/harb.git`
- Short slug: `johba/harb` (uses local Forgejo as the primary remote)
The factory will clone from the remote URL (if provided) or from your local Forgejo, then mirror to the remote.
Then initialize the factory for that project:
```bash
bin/disinto init johba/harb --yes
# or with full URL:
bin/disinto init https://github.com/johba/harb.git --yes
```
The `init` command will:
- Create all bot users (dev-bot, review-bot, etc.) on the local Forgejo
- Generate and save `WOODPECKER_TOKEN`
- Start the stack containers
- Clone the target repo into the agent workspace
> **Note:** The `--repo-root` flag is optional and only needed if you want to customize
> where the cloned repo lives. By default, it goes under `/home/agent/repos/<name>`.
### 3. Post-init verification
Run this checklist — fix any failures before proceeding:
```bash
# Stack healthy?
docker ps --format "table {{.Names}}\t{{.Status}}"
# Expected: forgejo, woodpecker (healthy), woodpecker-agent (healthy), agents, edge, staging
# Token generated?
grep WOODPECKER_TOKEN .env | grep -v "^$" && echo "OK" || echo "MISSING — see references/troubleshooting.md"
# Agent entrypoint loop running?
docker exec disinto-agents-1 tail -5 /home/agent/data/agent-entrypoint.log
# Agent can reach Forgejo?
docker exec disinto-agents-1 bash -c "source /home/agent/disinto/.env && curl -sf http://forgejo:3000/api/v1/version | jq .version"
# Agent repo cloned?
docker exec -u agent disinto-agents-1 ls /home/agent/repos/
```
If the agent repo is missing, clone it:
```bash
docker exec disinto-agents-1 chown -R agent:agent /home/agent/repos
docker exec -u agent disinto-agents-1 bash -c "source /home/agent/disinto/.env && git clone http://dev-bot:\${FORGE_TOKEN}@forgejo:3000/<org>/<repo>.git /home/agent/repos/<name>"
```
### 4. Create the project configuration file
The factory uses a TOML file to configure how it manages your project. Create
`projects/<name>.toml` based on the template format:
```toml
# projects/harb.toml
name = "harb"
repo = "johba/harb"
forge_url = "http://localhost:3000"
repo_root = "/home/agent/repos/harb"
primary_branch = "master"
[ci]
woodpecker_repo_id = 0
stale_minutes = 60
[services]
containers = ["ponder"]
[monitoring]
check_prs = true
check_dev_agent = true
check_pipeline_stall = true
# [mirrors]
# github = "git@github.com:johba/harb.git"
# codeberg = "git@codeberg.org:johba/harb.git"
```
**Key fields:**
- `name`: Project identifier (used for file names, logs, etc.)
- `repo`: The source repo in `owner/name` format
- `forge_url`: URL of your local Forgejo instance
- `repo_root`: Where the agent clones the repo
- `primary_branch`: Default branch name (e.g., `main` or `master`)
- `woodpecker_repo_id`: Set to `0` initially; auto-populated on first CI run
- `containers`: List of Docker containers the factory should manage
- `mirrors`: Optional external forge URLs for backup/sync
### 5. Mirrors (optional)
[ASK] Should the factory mirror to external forges? If yes, which?
- GitHub: need repo URL and SSH key added to GitHub account
- Codeberg: need repo URL and SSH key added to Codeberg account
Show the user their public key:
```bash
cat ~/.ssh/id_ed25519.pub
```
Test SSH access:
```bash
ssh -T git@github.com 2>&1; ssh -T git@codeberg.org 2>&1
```
If SSH host keys are missing: `ssh-keyscan github.com codeberg.org >> ~/.ssh/known_hosts 2>/dev/null`
Edit `projects/<name>.toml` to uncomment and configure mirrors:
```toml
[mirrors]
github = "git@github.com:Org/repo.git"
codeberg = "git@codeberg.org:user/repo.git"
```
Test with a manual push:
```bash
source .env && source lib/env.sh && export PROJECT_TOML=projects/<name>.toml && source lib/load-project.sh && source lib/mirrors.sh && mirror_push
```
### 6. Seed the backlog
[ASK] What should the factory work on first? Brainstorm with the user.
Help them create issues on the local Forgejo. Each issue needs:
- A clear title prefixed with `fix:`, `feat:`, or `chore:`
- A body describing what to change, which files, and any constraints
- The `backlog` label (so the dev-agent picks it up)
```bash
source .env
BACKLOG_ID=$(curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/labels" \
-H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | select(.name=="backlog") | .id')
curl -sf -X POST "http://localhost:3000/api/v1/repos/<org>/<repo>/issues" \
-H "Authorization: token $FORGE_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"title\": \"<title>\", \"body\": \"<body>\", \"labels\": [$BACKLOG_ID]}"
```
For issues with dependencies, add `Depends-on: #N` in the body — the dev-agent checks
these before starting.
Use labels:
- `backlog` — ready for the dev-agent
- `blocked` — parked, not for the factory
- No label — tracked but not for autonomous work
### 7. Watch it work
The dev-agent runs every 5 minutes via the entrypoint polling loop. Trigger manually to see it immediately:
```bash
source .env
export PROJECT_TOML=projects/<name>.toml
docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/disinto && bash dev/dev-poll.sh projects/<name>.toml"
```
Then monitor:
```bash
# Watch the agent work
docker exec disinto-agents-1 tail -f /home/agent/data/logs/dev/dev-agent.log
# Check for Claude running
docker exec disinto-agents-1 bash -c "for f in /proc/[0-9]*/cmdline; do cmd=\$(tr '\0' ' ' < \$f 2>/dev/null); echo \$cmd | grep -q 'claude.*-p' && echo 'Claude is running'; done"
```

View file

@ -1,152 +0,0 @@
version: "3.8"
services:
agents:
build:
context: .
dockerfile: docker/agents/Dockerfile
image: disinto/agents:latest
container_name: disinto-agents
volumes:
- ./data/agents:/home/agent/data
- ./disinto:/home/agent/disinto:ro
- /usr/local/bin/claude:/usr/local/bin/claude:ro
environment:
- FORGE_URL=http://forgejo:3000
- FORGE_TOKEN=${FORGE_TOKEN:-}
- FORGE_REVIEW_TOKEN=${FORGE_REVIEW_TOKEN:-}
- FORGE_GARDENER_TOKEN=${FORGE_GARDENER_TOKEN:-}
- FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-}
- FORGE_PREDICTOR_TOKEN=${FORGE_PREDICTOR_TOKEN:-}
- FORGE_ARCHITECT_TOKEN=${FORGE_ARCHITECT_TOKEN:-}
- FORGE_VAULT_TOKEN=${FORGE_VAULT_TOKEN:-}
- FORGE_PLANNER_TOKEN=${FORGE_PLANNER_TOKEN:-}
- FORGE_BOT_USERNAMES=${FORGE_BOT_USERNAMES:-}
- WOODPECKER_TOKEN=${WOODPECKER_TOKEN:-}
- CLAUDE_TIMEOUT=${CLAUDE_TIMEOUT:-7200}
- CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- FORGE_ADMIN_PASS=${FORGE_ADMIN_PASS:-}
- DISINTO_AGENTS=review,gardener
depends_on:
- forgejo
agents-llama:
build:
context: .
dockerfile: docker/agents/Dockerfile
image: disinto/agents-llama:latest
container_name: disinto-agents-llama
volumes:
- ./data/llama:/home/agent/data
- ./disinto:/home/agent/disinto:ro
- /usr/local/bin/claude:/usr/local/bin/claude:ro
environment:
- FORGE_URL=http://forgejo:3000
- FORGE_TOKEN=${FORGE_TOKEN_LLAMA:-}
- FORGE_PASS=${FORGE_PASS_LLAMA:-}
- FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-}
- FORGE_PREDICTOR_TOKEN=${FORGE_PREDICTOR_TOKEN:-}
- FORGE_ARCHITECT_TOKEN=${FORGE_ARCHITECT_TOKEN:-}
- FORGE_VAULT_TOKEN=${FORGE_VAULT_TOKEN:-}
- FORGE_PLANNER_TOKEN=${FORGE_PLANNER_TOKEN:-}
- FORGE_BOT_USERNAMES=${FORGE_BOT_USERNAMES:-}
- WOODPECKER_TOKEN=${WOODPECKER_TOKEN:-}
- CLAUDE_TIMEOUT=${CLAUDE_TIMEOUT:-7200}
- CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
- CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=60
- CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-}
- FORGE_ADMIN_PASS=${FORGE_ADMIN_PASS:-}
- DISINTO_AGENTS=dev
- PROJECT_TOML=projects/disinto.toml
- FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
- POLL_INTERVAL=${POLL_INTERVAL:-300}
- AGENT_ROLES=dev
depends_on:
- forgejo
runner:
image: disinto/agents:latest
profiles: ["runner"]
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /usr/local/bin/claude:/usr/local/bin/claude:ro
- ${HOME}/.claude:/home/agent/.claude
- ${HOME}/.claude.json:/home/agent/.claude.json:ro
entrypoint: ["bash", "/home/agent/disinto/docker/runner/entrypoint-runner.sh"]
environment:
- DISINTO_CONTAINER=1
- FORGE_URL=${FORGE_URL:-}
- FORGE_TOKEN=${FORGE_TOKEN:-}
- FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
- FORGE_OPS_REPO=${FORGE_OPS_REPO:-}
- PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- CLAUDE_MODEL=${CLAUDE_MODEL:-}
networks:
- default
reproduce:
build:
context: .
dockerfile: docker/reproduce/Dockerfile
image: disinto-reproduce:latest
network_mode: host
profiles: ["reproduce"]
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agent-data:/home/agent/data
- project-repos:/home/agent/repos
- ${HOME}/.claude:/home/agent/.claude
- /usr/local/bin/claude:/usr/local/bin/claude:ro
- ${HOME}/.ssh:/home/agent/.ssh:ro
env_file:
- .env
edge:
build:
context: docker/edge
dockerfile: Dockerfile
image: disinto/edge:latest
container_name: disinto-edge
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /usr/local/bin/claude:/usr/local/bin/claude:ro
- ${HOME}/.claude:/home/agent/.claude
- ${HOME}/.claude.json:/home/agent/.claude.json:ro
- disinto-logs:/opt/disinto-logs
- ./docker-compose.yml:/opt/docker-compose.yml:ro
- ./projects:/opt/disinto-projects:ro
environment:
- FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- CLAUDE_MODEL=claude-sonnet-4-6
- FORGE_TOKEN=${FORGE_TOKEN:-}
- FORGE_URL=http://forgejo:3000
- DISINTO_CONTAINER=1
- HOST_PROJECT_DIR=${HOST_PROJECT_DIR:-.}
- PROJECTS_DIR=/opt/disinto-projects
ports:
- "80:80"
- "443:443"
depends_on:
- forgejo
forgejo:
image: codeberg.org/forgejo/forgejo:1
container_name: disinto-forgejo
volumes:
- ./data/forgejo:/data
environment:
- FORGEJO__database__DB_TYPE=sqlite3
- FORGEJO__service__REGISTER_EMAIL_CONFIRMATION=false
- FORGEJO__service__ENABLE_NOTIFY_MAIL=false
- FORGEJO__service__DISABLE_REGISTRATION=true
- FORGEJO__service__REQUIRE_SIGNIN_VIEW=true
ports:
- "3000:3000"
volumes:
disinto-logs:

View file

@ -1,18 +1,14 @@
FROM debian:bookworm-slim FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
bash curl git jq tmux python3 python3-pip openssh-client ca-certificates age shellcheck procps gosu \ bash curl git jq tmux cron python3 openssh-client ca-certificates \
&& pip3 install --break-system-packages networkx \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Pre-built binaries (copied from docker/agents/bin/)
# SOPS — encrypted data decryption tool
COPY docker/agents/bin/sops /usr/local/bin/sops
RUN chmod +x /usr/local/bin/sops
# tea CLI — official Gitea/Forgejo CLI for issue/label/comment operations # tea CLI — official Gitea/Forgejo CLI for issue/label/comment operations
COPY docker/agents/bin/tea /usr/local/bin/tea # Checksum from https://dl.gitea.com/tea/0.9.2/tea-0.9.2-linux-amd64.sha256
RUN chmod +x /usr/local/bin/tea RUN curl -sL https://dl.gitea.com/tea/0.9.2/tea-0.9.2-linux-amd64 -o /usr/local/bin/tea \
&& echo "be10cdf9a619e3c0f121df874960ed19b53e62d1c7036cf60313a28b5227d54d /usr/local/bin/tea" | sha256sum -c - \
&& chmod +x /usr/local/bin/tea
# Claude CLI is mounted from the host via docker-compose volume. # Claude CLI is mounted from the host via docker-compose volume.
# No internet access to cli.anthropic.com required at build time. # No internet access to cli.anthropic.com required at build time.
@ -20,14 +16,11 @@ RUN chmod +x /usr/local/bin/tea
# Non-root user # Non-root user
RUN useradd -m -u 1000 -s /bin/bash agent RUN useradd -m -u 1000 -s /bin/bash agent
# Copy disinto code into the image COPY entrypoint.sh /entrypoint.sh
COPY . /home/agent/disinto
COPY docker/agents/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh RUN chmod +x /entrypoint.sh
# Entrypoint runs polling loop directly, dropping to agent user via gosu. # Entrypoint runs as root to start the cron daemon;
# All scripts execute as the agent user (UID 1000) while preserving env vars. # cron jobs execute as the agent user (crontab -u agent).
WORKDIR /home/agent/disinto WORKDIR /home/agent
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]

View file

@ -1,105 +1,51 @@
#!/usr/bin/env bash #!/usr/bin/env bash
set -euo pipefail set -euo pipefail
# entrypoint.sh — Start agent container with polling loop # entrypoint.sh — Start agent container with cron in foreground
# #
# Runs as root inside the container. Drops to agent user via gosu for all # Runs as root inside the container. Installs crontab entries for the
# poll scripts. All Docker Compose env vars are inherited (PATH, FORGE_TOKEN, # agent user from project TOMLs, then starts cron in the foreground.
# ANTHROPIC_API_KEY, etc.). # All cron jobs execute as the agent user (UID 1000).
#
# AGENT_ROLES env var controls which scripts run: "review,dev,gardener,architect,planner,predictor"
# (default: all six). Uses while-true loop with staggered intervals:
# - review-poll: every 5 minutes (offset by 0s)
# - dev-poll: every 5 minutes (offset by 2 minutes)
# - gardener: every 6 hours (72 iterations * 5 min)
# - architect: every 6 hours (same as gardener)
# - planner: every 12 hours (144 iterations * 5 min)
# - predictor: every 24 hours (288 iterations * 5 min)
DISINTO_DIR="/home/agent/disinto" DISINTO_DIR="/home/agent/disinto"
LOGFILE="/home/agent/data/agent-entrypoint.log" LOGFILE="/home/agent/data/agent-entrypoint.log"
mkdir -p /home/agent/data/logs mkdir -p /home/agent/data
chown -R agent:agent /home/agent/data chown agent:agent /home/agent/data
log() { log() {
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" | tee -a "$LOGFILE" printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" | tee -a "$LOGFILE"
} }
# Initialize state directory and files if they don't exist # Build crontab from project TOMLs and install for the agent user.
init_state_dir() { install_project_crons() {
local state_dir="${DISINTO_DIR}/state" local cron_lines="DISINTO_CONTAINER=1
mkdir -p "$state_dir" USER=agent"
# Create empty state files so check_active guards work for toml in "${DISINTO_DIR}"/projects/*.toml; do
for agent in dev reviewer gardener architect planner predictor; do [ -f "$toml" ] || continue
touch "$state_dir/.${agent}-active" 2>/dev/null || true local pname
pname=$(python3 -c "
import sys, tomllib
with open(sys.argv[1], 'rb') as f:
print(tomllib.load(f)['name'])
" "$toml" 2>/dev/null) || continue
cron_lines="${cron_lines}
# disinto: ${pname}
2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${DISINTO_DIR}/review/review-poll.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1
4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${DISINTO_DIR}/dev/dev-poll.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1
0 0,6,12,18 * * * cd ${DISINTO_DIR} && bash gardener/gardener-run.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
done done
chown -R agent:agent "$state_dir"
log "Initialized state directory"
}
# Configure git credential helper for password-based HTTP auth. if [ -n "$cron_lines" ]; then
# Forgejo 11.x rejects API tokens for git push (#361); password auth works. printf '%s\n' "$cron_lines" | crontab -u agent -
# This ensures all git operations (clone, fetch, push) from worktrees use log "Installed crontab for agent user"
# password auth without needing tokens embedded in remote URLs.
configure_git_creds() {
if [ -n "${FORGE_PASS:-}" ] && [ -n "${FORGE_URL:-}" ]; then
_forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||')
_forge_proto=$(printf '%s' "$FORGE_URL" | sed 's|://.*||')
# Determine the bot username from FORGE_TOKEN identity (or default to dev-bot)
_bot_user=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_URL}/api/v1/user" 2>/dev/null | jq -r '.login // empty') || _bot_user=""
_bot_user="${_bot_user:-dev-bot}"
# Write a static credential helper script (git credential protocol)
cat > /home/agent/.git-credentials-helper <<CREDEOF
#!/bin/sh
# Auto-generated git credential helper for Forgejo password auth (#361)
# Only respond to "get" action; ignore "store" and "erase".
[ "\$1" = "get" ] || exit 0
# Read and discard stdin (git sends protocol/host info)
cat >/dev/null
echo "protocol=${_forge_proto}"
echo "host=${_forge_host}"
echo "username=${_bot_user}"
echo "password=${FORGE_PASS}"
CREDEOF
chmod 755 /home/agent/.git-credentials-helper
chown agent:agent /home/agent/.git-credentials-helper
gosu agent bash -c "git config --global credential.helper '/home/agent/.git-credentials-helper'"
log "Git credential helper configured for ${_bot_user}@${_forge_host} (password auth)"
fi
# Set safe.directory to work around dubious ownership after container restart
# (https://github.com/disinto-admin/disinto/issues/517)
gosu agent bash -c "git config --global --add safe.directory '*'"
}
# Configure tea CLI login for forge operations (runs as agent user).
# tea stores config in ~/.config/tea/ — persistent across container restarts
# only if that directory is on a mounted volume.
configure_tea_login() {
if command -v tea &>/dev/null && [ -n "${FORGE_TOKEN:-}" ] && [ -n "${FORGE_URL:-}" ]; then
local_tea_login="forgejo"
case "$FORGE_URL" in
*codeberg.org*) local_tea_login="codeberg" ;;
esac
gosu agent bash -c "tea login add \
--name '${local_tea_login}' \
--url '${FORGE_URL}' \
--token '${FORGE_TOKEN}' \
--no-version-check 2>/dev/null || true"
log "tea login configured: ${local_tea_login}${FORGE_URL}"
else else
log "tea login: skipped (tea not found or FORGE_TOKEN/FORGE_URL not set)" log "No project TOMLs found — crontab empty"
fi fi
} }
log "Agent container starting" log "Agent container starting"
# Set USER for scripts that source lib/env.sh (e.g., OPS_REPO_ROOT default)
export USER=agent
# Verify Claude CLI is available (expected via volume mount from host). # Verify Claude CLI is available (expected via volume mount from host).
if ! command -v claude &>/dev/null; then if ! command -v claude &>/dev/null; then
log "FATAL: claude CLI not found in PATH." log "FATAL: claude CLI not found in PATH."
@ -122,119 +68,26 @@ else
log "Run 'claude auth login' on the host, or set ANTHROPIC_API_KEY in .env" log "Run 'claude auth login' on the host, or set ANTHROPIC_API_KEY in .env"
fi fi
# Configure git and tea once at startup (as root, then drop to agent) install_project_crons
configure_git_creds
configure_tea_login
# Initialize state directory for check_active guards # Configure tea CLI login for forge operations (runs as agent user).
init_state_dir # tea stores config in ~/.config/tea/ — persistent across container restarts
# only if that directory is on a mounted volume.
# Parse AGENT_ROLES env var (default: all agents) if command -v tea &>/dev/null && [ -n "${FORGE_TOKEN:-}" ] && [ -n "${FORGE_URL:-}" ]; then
# Expected format: comma-separated list like "review,dev,gardener" local_tea_login="forgejo"
AGENT_ROLES="${AGENT_ROLES:-review,dev,gardener,architect,planner,predictor}" case "$FORGE_URL" in
log "Agent roles configured: ${AGENT_ROLES}" *codeberg.org*) local_tea_login="codeberg" ;;
esac
# Poll interval in seconds (5 minutes default) su -s /bin/bash agent -c "tea login add \
POLL_INTERVAL="${POLL_INTERVAL:-300}" --name '${local_tea_login}' \
--url '${FORGE_URL}' \
log "Entering polling loop (interval: ${POLL_INTERVAL}s, roles: ${AGENT_ROLES})" --token '${FORGE_TOKEN}' \
--no-version-check 2>/dev/null || true"
# Main polling loop using iteration counter for gardener scheduling log "tea login configured: ${local_tea_login}${FORGE_URL}"
iteration=0
while true; do
iteration=$((iteration + 1))
now=$(date +%s)
# Stale .sid cleanup — needed for agents that don't support --resume
# Run this as the agent user
gosu agent bash -c "rm -f /tmp/dev-session-*.sid /tmp/review-session-*.sid 2>/dev/null || true"
# Poll each project TOML
# Fast agents (review-poll, dev-poll) run in background so they don't block
# each other. Slow agents (gardener, architect, planner, predictor) also run
# in background but are guarded by pgrep so only one instance runs at a time.
# The flock on session.lock already serializes claude -p calls.
for toml in "${DISINTO_DIR}"/projects/*.toml; do
[ -f "$toml" ] || continue
log "Processing project TOML: ${toml}"
# --- Fast agents: run in background, wait before slow agents ---
# Review poll (every iteration)
if [[ ",${AGENT_ROLES}," == *",review,"* ]]; then
log "Running review-poll (iteration ${iteration}) for ${toml}"
gosu agent bash -c "cd ${DISINTO_DIR} && bash review/review-poll.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/review-poll.log" 2>&1 &
fi
sleep 2 # stagger fast polls
# Dev poll (every iteration)
if [[ ",${AGENT_ROLES}," == *",dev,"* ]]; then
log "Running dev-poll (iteration ${iteration}) for ${toml}"
gosu agent bash -c "cd ${DISINTO_DIR} && bash dev/dev-poll.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/dev-poll.log" 2>&1 &
fi
# Wait for fast polls to finish before launching slow agents
wait
# --- Slow agents: run in background with pgrep guard ---
# Gardener (every 6 hours = 72 iterations * 5 min = 21600 seconds)
if [[ ",${AGENT_ROLES}," == *",gardener,"* ]]; then
gardener_iteration=$((iteration * POLL_INTERVAL))
gardener_interval=$((6 * 60 * 60)) # 6 hours in seconds
if [ $((gardener_iteration % gardener_interval)) -eq 0 ] && [ "$now" -ge "$gardener_iteration" ]; then
if ! pgrep -f "gardener-run.sh" >/dev/null; then
log "Running gardener (iteration ${iteration}, 6-hour interval) for ${toml}"
gosu agent bash -c "cd ${DISINTO_DIR} && bash gardener/gardener-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/gardener.log" 2>&1 &
else else
log "Skipping gardener — already running" log "tea login: skipped (tea not found or FORGE_TOKEN/FORGE_URL not set)"
fi
fi
fi fi
# Architect (every 6 hours, same schedule as gardener) # Run cron in the foreground. Cron jobs execute as the agent user.
if [[ ",${AGENT_ROLES}," == *",architect,"* ]]; then log "Starting cron daemon"
architect_iteration=$((iteration * POLL_INTERVAL)) exec cron -f
architect_interval=$((6 * 60 * 60)) # 6 hours in seconds
if [ $((architect_iteration % architect_interval)) -eq 0 ] && [ "$now" -ge "$architect_iteration" ]; then
if ! pgrep -f "architect-run.sh" >/dev/null; then
log "Running architect (iteration ${iteration}, 6-hour interval) for ${toml}"
gosu agent bash -c "cd ${DISINTO_DIR} && bash architect/architect-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/architect.log" 2>&1 &
else
log "Skipping architect — already running"
fi
fi
fi
# Planner (every 12 hours = 144 iterations * 5 min = 43200 seconds)
if [[ ",${AGENT_ROLES}," == *",planner,"* ]]; then
planner_iteration=$((iteration * POLL_INTERVAL))
planner_interval=$((12 * 60 * 60)) # 12 hours in seconds
if [ $((planner_iteration % planner_interval)) -eq 0 ] && [ "$now" -ge "$planner_iteration" ]; then
if ! pgrep -f "planner-run.sh" >/dev/null; then
log "Running planner (iteration ${iteration}, 12-hour interval) for ${toml}"
gosu agent bash -c "cd ${DISINTO_DIR} && bash planner/planner-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/planner.log" 2>&1 &
else
log "Skipping planner — already running"
fi
fi
fi
# Predictor (every 24 hours = 288 iterations * 5 min = 86400 seconds)
if [[ ",${AGENT_ROLES}," == *",predictor,"* ]]; then
predictor_iteration=$((iteration * POLL_INTERVAL))
predictor_interval=$((24 * 60 * 60)) # 24 hours in seconds
if [ $((predictor_iteration % predictor_interval)) -eq 0 ] && [ "$now" -ge "$predictor_iteration" ]; then
if ! pgrep -f "predictor-run.sh" >/dev/null; then
log "Running predictor (iteration ${iteration}, 24-hour interval) for ${toml}"
gosu agent bash -c "cd ${DISINTO_DIR} && bash predictor/predictor-run.sh \"${toml}\"" >> "${DISINTO_DIR}/../data/logs/predictor.log" 2>&1 &
else
log "Skipping predictor — already running"
fi
fi
fi
done
sleep "${POLL_INTERVAL}"
done

View file

@ -1,4 +0,0 @@
FROM caddy:latest
RUN apk add --no-cache bash jq curl git docker-cli python3
COPY entrypoint-edge.sh /usr/local/bin/entrypoint-edge.sh
ENTRYPOINT ["bash", "/usr/local/bin/entrypoint-edge.sh"]

File diff suppressed because it is too large Load diff

View file

@ -1,70 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
# Set USER before sourcing env.sh (Alpine doesn't set USER)
export USER="${USER:-root}"
FORGE_URL="${FORGE_URL:-http://forgejo:3000}"
# Derive FORGE_REPO from PROJECT_TOML if available, otherwise require explicit env var
if [ -z "${FORGE_REPO:-}" ]; then
# Try to find a project TOML to derive FORGE_REPO from
_project_toml="${PROJECT_TOML:-}"
if [ -z "$_project_toml" ] && [ -d "${FACTORY_ROOT:-/opt/disinto}/projects" ]; then
for toml in "${FACTORY_ROOT:-/opt/disinto}"/projects/*.toml; do
if [ -f "$toml" ]; then
_project_toml="$toml"
break
fi
done
fi
if [ -n "$_project_toml" ] && [ -f "$_project_toml" ]; then
# Parse FORGE_REPO from project TOML using load-project.sh
if source "${FACTORY_ROOT:-/opt/disinto}/lib/load-project.sh" "$_project_toml" 2>/dev/null; then
if [ -n "${FORGE_REPO:-}" ]; then
echo "Derived FORGE_REPO from PROJECT_TOML: $_project_toml" >&2
fi
fi
fi
# If still not set, fail fast with a clear error message
if [ -z "${FORGE_REPO:-}" ]; then
echo "FATAL: FORGE_REPO environment variable not set" >&2
echo "Set FORGE_REPO=<owner>/<repo> in .env (e.g. FORGE_REPO=disinto-admin/disinto)" >&2
exit 1
fi
fi
# Shallow clone at the pinned version (inject token to support auth-required Forgejo)
if [ ! -d /opt/disinto/.git ]; then
_auth_url=$(printf '%s' "$FORGE_URL" | sed "s|://|://token:${FORGE_TOKEN}@|")
git clone --depth 1 --branch "${DISINTO_VERSION:-main}" "${_auth_url}/${FORGE_REPO}.git" /opt/disinto
fi
# Set HOME so that claude OAuth credentials and session.lock are found at the
# same in-container path as in disinto-agents (/home/agent/.claude), which makes
# flock cross-serialize across containers on the same host inode.
export HOME=/home/agent
mkdir -p "$HOME"
# Ensure log directory exists
mkdir -p /opt/disinto-logs
# Start dispatcher in background
bash /opt/disinto/docker/edge/dispatcher.sh &
# Start supervisor loop in background
PROJECT_TOML="${PROJECT_TOML:-projects/disinto.toml}"
(while true; do
bash /opt/disinto/supervisor/supervisor-run.sh "/opt/disinto/${PROJECT_TOML}" 2>&1 | tee -a /opt/disinto-logs/supervisor.log || true
sleep 1200 # 20 minutes
done) &
# Caddy as main process — run in foreground via wait so background jobs survive
# (exec replaces the shell, which can orphan backgrounded subshells)
caddy run --config /etc/caddy/Caddyfile --adapter caddyfile &
# Exit when any child dies (caddy crash → container restart via docker compose)
wait -n
exit 1

View file

@ -1,11 +0,0 @@
FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends \
bash curl git jq docker.io docker-compose-plugin \
nodejs npm chromium \
&& npm install -g @anthropic-ai/mcp-playwright \
&& rm -rf /var/lib/apt/lists/*
RUN useradd -m -u 1000 -s /bin/bash agent
COPY docker/reproduce/entrypoint-reproduce.sh /entrypoint-reproduce.sh
RUN chmod +x /entrypoint-reproduce.sh
WORKDIR /home/agent
ENTRYPOINT ["/entrypoint-reproduce.sh"]

File diff suppressed because it is too large Load diff

View file

@ -1,106 +0,0 @@
#!/usr/bin/env bash
# entrypoint-runner.sh — Vault runner entrypoint
#
# Receives an action-id, reads the vault action TOML to get the formula name,
# then dispatches to the appropriate executor:
# - formulas/<name>.sh → bash (mechanical operations like release)
# - formulas/<name>.toml → claude -p (reasoning tasks like triage, architect)
#
# Usage: entrypoint-runner.sh <action-id>
#
# Expects:
# OPS_REPO_ROOT — path to the ops repo (mounted by compose)
# FACTORY_ROOT — path to disinto code (default: /home/agent/disinto)
#
# Part of #516.
set -euo pipefail
FACTORY_ROOT="${FACTORY_ROOT:-/home/agent/disinto}"
OPS_REPO_ROOT="${OPS_REPO_ROOT:-/home/agent/ops}"
log() {
printf '[%s] runner: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$*"
}
# ── Argument parsing ─────────────────────────────────────────────────────
action_id="${1:-}"
if [ -z "$action_id" ]; then
log "ERROR: action-id argument required"
echo "Usage: entrypoint-runner.sh <action-id>" >&2
exit 1
fi
# ── Read vault action TOML ───────────────────────────────────────────────
action_toml="${OPS_REPO_ROOT}/vault/actions/${action_id}.toml"
if [ ! -f "$action_toml" ]; then
log "ERROR: vault action TOML not found: ${action_toml}"
exit 1
fi
# Extract formula name from TOML
formula=$(grep -E '^formula\s*=' "$action_toml" \
| sed -E 's/^formula\s*=\s*"(.*)"/\1/' | tr -d '\r')
if [ -z "$formula" ]; then
log "ERROR: no 'formula' field found in ${action_toml}"
exit 1
fi
# Extract context for logging
context=$(grep -E '^context\s*=' "$action_toml" \
| sed -E 's/^context\s*=\s*"(.*)"/\1/' | tr -d '\r')
log "Action: ${action_id}, formula: ${formula}, context: ${context:-<none>}"
# Export action TOML path so formula scripts can use it directly
export VAULT_ACTION_TOML="$action_toml"
# ── Dispatch: .sh (mechanical) vs .toml (Claude reasoning) ──────────────
formula_sh="${FACTORY_ROOT}/formulas/${formula}.sh"
formula_toml="${FACTORY_ROOT}/formulas/${formula}.toml"
if [ -f "$formula_sh" ]; then
# Mechanical operation — run directly
log "Dispatching to shell script: ${formula_sh}"
exec bash "$formula_sh" "$action_id"
elif [ -f "$formula_toml" ]; then
# Reasoning task — launch Claude with the formula as prompt
log "Dispatching to Claude with formula: ${formula_toml}"
formula_content=$(cat "$formula_toml")
action_context=$(cat "$action_toml")
prompt="You are a vault runner executing a formula-based operational task.
## Vault action
\`\`\`toml
${action_context}
\`\`\`
## Formula
\`\`\`toml
${formula_content}
\`\`\`
## Instructions
Execute the steps defined in the formula above. The vault action context provides
the specific parameters for this run. Execute each step in order, verifying
success before proceeding to the next.
FACTORY_ROOT=${FACTORY_ROOT}
OPS_REPO_ROOT=${OPS_REPO_ROOT}
"
exec claude -p "$prompt" \
--dangerously-skip-permissions \
${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"}
else
log "ERROR: no formula found for '${formula}' — checked ${formula_sh} and ${formula_toml}"
exit 1
fi

View file

@ -114,3 +114,4 @@ When reviewing PRs or designing new agents, ask:
| gardener | 1242 (agent 471 + poll 771) | Medium — backlog triage, duplicate detection, tech-debt scoring | Poll is heavy orchestration; agent is prompt-driven | | gardener | 1242 (agent 471 + poll 771) | Medium — backlog triage, duplicate detection, tech-debt scoring | Poll is heavy orchestration; agent is prompt-driven |
| vault | 442 (4 scripts) | Medium — approval flow, human gate decisions | Intentionally bash-heavy (security gate should be deterministic) | | vault | 442 (4 scripts) | Medium — approval flow, human gate decisions | Intentionally bash-heavy (security gate should be deterministic) |
| planner | 382 | Medium — AGENTS.md update, gap analysis | Tmux+formula (done, #232) | | planner | 382 | Medium — AGENTS.md update, gap analysis | Tmux+formula (done, #232) |
| action-agent | 192 | Light — formula execution | Close to target |

View file

@ -1,25 +0,0 @@
# Vault blast-radius tiers
## Tiers
| Tier | Meaning | Dispatch path |
|------|---------|---------------|
| low | Revertable, no external side effects | Direct commit to ops main; no human gate |
| medium | Significant but reversible | PR on ops repo; blocks calling agent until merged |
| high | Irreversible or high-blast-radius | PR on ops repo; hard blocks |
## Which agents are affected
Vault-blocking applies to: predictor, planner, architect, deploy pipelines, releases, shipping.
It does NOT apply to dev-agent — dev-agent work is always committed to a feature branch and
revertable via git revert. Dev-agent never needs a vault gate.
## Default tier
Unknown formulas default to `high`. When adding a new formula, add it to
`vault/policy.toml` (in ops repo, seeded during disinto init from disinto repo template).
## Per-action override
A vault action TOML may include `blast_radius = "low"` to override the policy tier
for that specific invocation. Use sparingly — policy.toml is the authoritative source.

View file

@ -39,11 +39,9 @@ programmatically instead of parsing SKILL.md instructions.
(`mcp` package). This adds a build step, runtime dependency, and (`mcp` package). This adds a build step, runtime dependency, and
language that no current contributor or agent maintains. language that no current contributor or agent maintains.
2. **Persistent process.** The factory already runs a long-lived polling loop 2. **Persistent process.** The factory is cron-driven — no long-running
(`docker/agents/entrypoint.sh`), so an MCP server is not architecturally daemons. An MCP server must stay up, be monitored, and be restarted on
alien — the loop could keep an MCP client alive across iterations. However, failure. This contradicts the factory's event-driven architecture (AD-004).
adding a second long-running process increases the monitoring surface and
restart complexity.
3. **Thin wrapper over existing APIs.** Every proposed MCP tool maps directly 3. **Thin wrapper over existing APIs.** Every proposed MCP tool maps directly
to a forge API call or a skill script invocation. The MCP server would be to a forge API call or a skill script invocation. The MCP server would be

View file

@ -92,9 +92,10 @@ PHASE:failed → label issue blocked, post diagnostic comment
### `idle_prompt` exit reason ### `idle_prompt` exit reason
The phase monitor can exit with `_MONITOR_LOOP_EXIT=idle_prompt`. This happens `monitor_phase_loop` (in `lib/agent-session.sh`) can exit with
when Claude returns to the interactive prompt (``) for **3 consecutive polls** `_MONITOR_LOOP_EXIT=idle_prompt`. This happens when Claude returns to the
without writing any phase signal to the phase file. interactive prompt (``) for **3 consecutive polls** without writing any phase
signal to the phase file.
**Trigger conditions:** **Trigger conditions:**
- The phase file is empty (no phase has ever been written), **and** - The phase file is empty (no phase has ever been written), **and**
@ -110,13 +111,14 @@ without writing any phase signal to the phase file.
callback without the phase file actually containing that value. callback without the phase file actually containing that value.
**Agent requirements:** **Agent requirements:**
- **Callback:** Must handle `PHASE:failed` defensively — the session is already - **Callback (`_on_phase_change` / `formula_phase_callback`):** Must handle
dead, so any tmux send-keys or session-dependent logic must be skipped or `PHASE:failed` defensively — the session is already dead, so any tmux
guarded. send-keys or session-dependent logic must be skipped or guarded.
- **Post-loop exit handler (`case $_MONITOR_LOOP_EXIT`):** Must include an - **Post-loop exit handler (`case $_MONITOR_LOOP_EXIT`):** Must include an
`idle_prompt)` branch. Typical actions: log the event, clean up temp files, `idle_prompt)` branch. Typical actions: log the event, clean up temp files,
and (for agents that use escalation) write an escalation entry or notify via and (for agents that use escalation) write an escalation entry or notify via
vault/forge. See `dev/dev-agent.sh` for reference implementations. vault/forge. See `dev/dev-agent.sh`, `action/action-agent.sh`, and
`gardener/gardener-agent.sh` for reference implementations.
## Crash Recovery ## Crash Recovery

View file

@ -1,101 +0,0 @@
# Vault PR Workflow
This document describes the vault PR-based approval workflow for the ops repo.
## Overview
The vault system enables agents to request execution of privileged actions (deployments, token operations, etc.) through a PR-based approval process. This replaces the old vault directory structure with a more auditable, collaborative workflow.
## Branch Protection
The `main` branch on the ops repo (`johba/disinto-ops`) is protected via Forgejo branch protection to enforce:
- **Require 1 approval before merge** — All vault PRs must have at least one approval from an admin user
- **Admin-only merge** — Only users with admin role can merge vault PRs (regular collaborators and bot accounts cannot)
- **Block direct pushes** — All changes to `main` must go through PRs
### Protection Rules
| Setting | Value |
|---------|-------|
| `enable_push` | `false` |
| `enable_force_push` | `false` |
| `enable_merge_commit` | `true` |
| `required_approvals` | `1` |
| `admin_enforced` | `true` |
## Vault PR Lifecycle
1. **Request** — Agent calls `lib/vault.sh:vault_request()` with action TOML content
2. **Validation** — TOML is validated against the schema in `vault/vault-env.sh`
3. **PR Creation** — A PR is created on `disinto-ops` with:
- Branch: `vault/<action-id>`
- Title: `vault: <action-id>`
- Labels: `vault`, `pending-approval`
- File: `vault/actions/<action-id>.toml`
- **Auto-merge enabled** — Forgejo will auto-merge after approval
4. **Approval** — Admin user reviews and approves the PR
5. **Auto-merge** — Forgejo automatically merges the PR once required approvals are met
6. **Execution** — Dispatcher (issue #76) polls for merged vault PRs and executes them
7. **Cleanup** — Executed vault items are moved to `fired/` (via PR)
## Bot Account Behavior
Bot accounts (dev-bot, review-bot, vault-bot, etc.) **cannot merge vault PRs** even if they have approval, due to the `admin_enforced` setting. This ensures:
- Only human admins can approve sensitive vault actions
- Bot accounts can only create vault PRs, not execute them
- Bot accounts cannot self-approve vault PRs (Forgejo prevents this automatically)
- Manual admin review is always required for privileged operations
## Setup
To set up branch protection on the ops repo:
```bash
# Source environment
source lib/env.sh
source lib/branch-protection.sh
# Set up protection
setup_vault_branch_protection main
# Verify setup
verify_branch_protection main
```
Or use the CLI directly:
```bash
export FORGE_TOKEN="<admin-token>"
export FORGE_URL="https://codeberg.org"
export FORGE_OPS_REPO="johba/disinto-ops"
# Set up protection
bash lib/branch-protection.sh setup main
# Verify
bash lib/branch-protection.sh verify main
```
## Testing
To verify the protection is working:
1. **Bot cannot merge** — Attempt to merge a PR with a bot token (should fail with HTTP 405)
2. **Admin can merge** — Attempt to merge with admin token (should succeed)
3. **Direct push blocked** — Attempt `git push origin main` (should be rejected)
## Related Issues
- #73 — Vault redesign proposal
- #74 — Vault action TOML schema
- #75 — Vault PR creation helper (`lib/vault.sh`)
- #76 — Dispatcher rewrite (poll for merged vault PRs)
- #77 — Branch protection on ops repo (this issue)
## See Also
- [`lib/vault.sh`](../lib/vault.sh) — Vault PR creation helper
- [`vault/vault-env.sh`](../vault/vault-env.sh) — TOML validation
- [`lib/branch-protection.sh`](../lib/branch-protection.sh) — Branch protection helper

View file

@ -1,196 +0,0 @@
# Updating the Disinto Factory
How to update the disinto factory code on a deployment box (e.g. harb-dev-box)
after a new version lands on the upstream Forgejo.
## Prerequisites
- SSH access to the deployment box
- The upstream remote (`devbox`) pointing to the disinto-dev-box Forgejo
## Step 1: Pull the latest code
```bash
cd ~/disinto
git fetch devbox main
git log --oneline devbox/main -5 # review what changed
git stash # save any local fixes
git merge devbox/main
```
If merge conflicts on `docker-compose.yml`: delete it and regenerate in step 3.
## Step 2: Preserve local config
These files are not in git but are needed at runtime. Back them up before
any compose regeneration:
```bash
cp .env .env.backup
cp projects/harb.toml projects/harb.toml.backup
cp docker-compose.override.yml docker-compose.override.yml.backup 2>/dev/null
```
## Step 3: Regenerate docker-compose.yml (if needed)
Only needed if `generate_compose()` changed or the compose was deleted.
```bash
rm docker-compose.yml
source .env
bin/disinto init https://codeberg.org/johba/harb --branch master --yes
```
This will regenerate the compose but may fail partway through (token collisions,
existing users). The compose file is written early — check it exists even if
init errors out.
### Known post-regeneration fixes (until #429 lands)
The generated compose has several issues on LXD deployments:
**1. AppArmor (#492)** — Add to ALL services:
```bash
sed -i '/^ forgejo:/a\ security_opt:\n - apparmor=unconfined' docker-compose.yml
sed -i '/^ agents:/a\ security_opt:\n - apparmor=unconfined' docker-compose.yml
# repeat for: agents-llama, edge, woodpecker, woodpecker-agent, staging, reproduce
```
**2. Forgejo image tag (#493)**:
```bash
sed -i 's|forgejo/forgejo:.*|forgejo/forgejo:11.0|' docker-compose.yml
```
**3. Agent credential mounts (#495)** — Add to agents volumes:
```yaml
- ${HOME}/.claude:/home/agent/.claude
- ${HOME}/.claude.json:/home/agent/.claude.json:ro
- ${HOME}/.ssh:/home/agent/.ssh:ro
- project-repos:/home/agent/repos
```
**4. Repo path (#494)** — Fix `projects/harb.toml` if init overwrote it:
```bash
sed -i 's|repo_root.*=.*"/home/johba/harb"|repo_root = "/home/agent/repos/harb"|' projects/harb.toml
sed -i 's|ops_repo_root.*=.*"/home/johba/harb-ops"|ops_repo_root = "/home/agent/repos/harb-ops"|' projects/harb.toml
```
**5. Add missing volumes** to the `volumes:` section at the bottom:
```yaml
volumes:
project-repos:
project-repos-llama:
disinto-logs:
```
## Step 4: Rebuild and restart
```bash
# Rebuild agents image (code is baked in via COPY)
docker compose build agents
# Restart all disinto services
docker compose up -d
# If edge fails to build (caddy:alpine has no apt-get), skip it:
docker compose up -d forgejo woodpecker woodpecker-agent agents staging
```
## Step 5: Verify
```bash
# All containers running?
docker ps --format 'table {{.Names}}\t{{.Status}}' | grep disinto
# Forgejo responding?
curl -sf -o /dev/null -w 'HTTP %{http_code}' http://localhost:3000/
# Claude auth works?
docker exec -u agent disinto-agents bash -c 'claude -p "say ok" 2>&1'
# Agent polling loop running?
docker exec disinto-agents pgrep -f entrypoint.sh
# If no process: check that entrypoint.sh is the container CMD and projects TOML is mounted.
# Agent repo cloned?
docker exec disinto-agents ls /home/agent/repos/harb/.git && echo ok
# If missing:
docker exec disinto-agents chown -R agent:agent /home/agent/repos
source .env
docker exec -u agent disinto-agents bash -c \
"git clone http://dev-bot:${FORGE_TOKEN}@forgejo:3000/johba/harb.git /home/agent/repos/harb"
# Git safe.directory (needed after volume recreation)
docker exec -u agent disinto-agents git config --global --add safe.directory /home/agent/repos/harb
```
## Step 6: Verify harb stack coexistence
```bash
# Harb stack still running?
cd ~/harb && docker compose ps --format 'table {{.Name}}\t{{.Status}}'
# No port conflicts?
# Forgejo: 3000, Woodpecker: 8000, harb caddy: 8081, umami: 3001
ss -tlnp | grep -E '3000|3001|8000|8081'
```
## Step 7: Docker disk hygiene
The reproduce image is ~1.3GB. Dangling images accumulate fast.
```bash
# Check disk
df -h /
# Prune dangling images (safe — only removes unused)
docker image prune -f
# Nuclear option (removes ALL unused images, volumes, networks):
docker system prune -af
# WARNING: this removes cached layers, requiring full rebuilds
```
## Troubleshooting
### Forgejo at 170%+ CPU, not responding
AppArmor issue. Add `security_opt: [apparmor=unconfined]` and recreate:
```bash
docker compose up -d forgejo
```
### "Not logged in" / OAuth expired
Re-auth on the host:
```bash
claude auth login
```
Credentials are bind-mounted into containers automatically.
Multiple containers sharing OAuth can cause frequent expiry — consider
using `ANTHROPIC_API_KEY` in `.env` instead.
### Agent loop not running after restart
The entrypoint reads `projects/*.toml` to determine which agents to run.
If the TOML isn't mounted or the disinto directory is read-only,
the polling loop won't start agents. Check:
```bash
docker exec disinto-agents ls /home/agent/disinto/projects/harb.toml
docker logs disinto-agents --tail 20 # look for "Entering polling loop"
```
### "fatal: not a git repository"
After image rebuilds, the baked-in `/home/agent/disinto` has no `.git`.
This breaks review-pr.sh (#408). Workaround:
```bash
docker exec -u agent disinto-agents git config --global --add safe.directory '*'
```
### Dev-agent stuck on closed issue
The dev-poll latches onto in-progress issues. If the issue was closed
externally, the agent skips it every cycle but never moves on. Check:
```bash
docker exec disinto-agents tail -5 /home/agent/data/logs/dev/dev-agent.log
```
Fix: clean the worktree and let it re-scan:
```bash
docker exec disinto-agents rm -rf /tmp/harb-worktree-*
```

View file

@ -1,175 +0,0 @@
# formulas/dev.toml — Dev agent formula (issue implementation)
#
# Executed by dev/dev-agent.sh via tmux session with Claude.
# dev-agent.sh is called by dev-poll.sh which finds the next ready issue
# from the backlog (priority tier first, then plain backlog).
#
# Steps: preflight → implement → CI → review → merge → journal
#
# Key behaviors:
# - Creates worktree for isolation
# - Uses tmux session for persistent Claude interaction
# - Phase-file signaling for orchestrator coordination
# - Auto-retry on CI failures (max 3 attempts)
# - Direct-merge for approved PRs (bypasses lock)
name = "dev"
description = "Issue implementation: code, commit, push, address CI/review"
version = 1
model = "sonnet"
[context]
files = ["AGENTS.md", "dev/AGENTS.md", "lib/env.sh", "lib/pr-lifecycle.sh", "lib/ci-helpers.sh"]
[[steps]]
id = "preflight"
title = "Review the issue and prepare implementation plan"
description = """
Read the issue body carefully. Understand:
- What needs to be implemented
- Any dependencies (check `## Dependencies` section)
- Existing code that might be affected
- Testing requirements
Then create a plan:
1. What files need to be modified/created
2. What tests need to be added
3. Any documentation updates
Check the preflight metrics from supervisor if available:
cat "$OPS_REPO_ROOT/journal/supervisor/$(date -u +%Y-%m-%d).md"
Note: Only proceed if all dependency issues are closed.
"""
[[steps]]
id = "implement"
title = "Write code to implement the issue"
description = """
Implement the changes:
1. Create a new worktree:
cd "$PROJECT_REPO_ROOT"
git worktree add -b "dev/{agent}-{issue}" ../{agent}-{issue}
2. Make your changes to the codebase
3. Add tests if applicable
4. Update documentation if needed
5. Commit with conventional commits:
git add -A
git commit -m "feat({issue}): {description}"
6. Push to forge:
git push -u origin dev/{agent}-{issue}
7. Create PR via API or web interface
- Title: feat({issue}): {description}
- Body: Link to issue, describe changes
- Labels: backlog, in-progress
Note: The worktree is preserved on crash for debugging.
"""
needs = ["preflight"]
[[steps]]
id = "ci"
title = "Wait for CI and address failures"
description = """
Monitor CI pipeline status via Woodpecker API:
woodpecker_api /repos/${WOODPECKER_REPO_ID}/pipelines?branch=dev/{agent}-{issue}
Wait for CI to complete. If CI fails:
1. Read the CI logs to understand the failure
2. Fix the issue
3. Amend commit and force push
4. Track CI attempts (max 3 retries)
CI fix tracker file:
$DISINTO_LOG_DIR/dev/ci-fixes-{project}.json
On CI success, proceed to review.
If CI exhausted (3 failures), escalate via PHASE:escalate.
"""
needs = ["implement"]
[[steps]]
id = "review"
title = "Address review feedback"
description = """
Check PR for review comments:
curl -sf "${FORGE_API}/pulls/{pr-number}/comments"
For each comment:
1. Understand the feedback
2. Make changes to fix the issue
3. Amend commit and force push
4. Address the comment in the PR
If review approves, proceed to merge.
If stuck or needs clarification, escalate via PHASE:escalate.
"""
needs = ["ci"]
[[steps]]
id = "merge"
title = "Merge the PR"
description = """
Check if PR is approved and CI is green:
curl -sf "${FORGE_API}/pulls/{pr-number}"
If approved (merged=true or approved_by set):
1. Merge the PR:
curl -sf -X PUT "${FORGE_API}/pulls/{pr-number}/merge" \\
-d '{"merge_method":"merge"}'
2. Mirror push to other remotes:
mirror_push
3. Close the issue:
curl -sf -X PATCH "${FORGE_API}/issues/{issue-number}" \\
-d '{"state":"closed"}'
4. Delete the branch:
git push origin --delete dev/{agent}-{issue}
If direct merge is blocked, note in journal and escalate.
"""
needs = ["review"]
[[steps]]
id = "journal"
title = "Write implementation journal"
description = """
Append a timestamped entry to the dev journal:
File path:
$OPS_REPO_ROOT/journal/dev/$(date -u +%Y-%m-%d).md
If the file already exists (multiple PRs merged same day), append.
If it does not exist, create it.
Format:
## Dev implementation — {issue-number}
Time: {timestamp}
PR: {pr-number}
Branch: dev/{agent}-{issue}
### Changes
- {summary of changes}
### CI attempts: {n}
### Review feedback: {n} comments addressed
### Lessons learned
- {what you learned during implementation}
### Knowledge added
If you discovered something new, add to knowledge:
echo "### Lesson title
Description." >> "${OPS_REPO_ROOT}/knowledge/{topic}.md"
After writing the journal, write the phase signal:
echo 'PHASE:done' > "$PHASE_FILE"
"""
needs = ["merge"]

View file

@ -203,7 +203,7 @@ If all tiers clear, write the completion summary and signal done:
echo "ACTION: grooming complete — 0 tech-debt remaining" >> "$RESULT_FILE" echo "ACTION: grooming complete — 0 tech-debt remaining" >> "$RESULT_FILE"
echo 'PHASE:done' > "$PHASE_FILE" echo 'PHASE:done' > "$PHASE_FILE"
Vault items filed during this run appear as PRs on ops repo for human approval. Vault items filed during this run are picked up by vault-poll automatically.
On unrecoverable error (API unavailable, repeated failures): On unrecoverable error (API unavailable, repeated failures):
printf 'PHASE:failed\nReason: %s\n' 'describe what failed' > "$PHASE_FILE" printf 'PHASE:failed\nReason: %s\n' 'describe what failed' > "$PHASE_FILE"

View file

@ -1,187 +0,0 @@
#!/usr/bin/env bash
# formulas/release.sh — Mechanical release script
#
# Implements the release workflow without Claude:
# 1. Validate prerequisites
# 2. Tag Forgejo main via API
# 3. Push tag to mirrors (Codeberg, GitHub) via token auth
# 4. Build and tag the agents Docker image
# 5. Restart agent containers
#
# Usage: release.sh <action-id>
#
# Expects env vars:
# FORGE_URL, FORGE_TOKEN, FORGE_REPO, PRIMARY_BRANCH
# GITHUB_TOKEN — for pushing tags to GitHub mirror
# CODEBERG_TOKEN — for pushing tags to Codeberg mirror
#
# The action TOML context field must contain the version, e.g.:
# context = "Release v1.2.0"
#
# Part of #516.
set -euo pipefail
FACTORY_ROOT="${FACTORY_ROOT:-/home/agent/disinto}"
OPS_REPO_ROOT="${OPS_REPO_ROOT:-/home/agent/ops}"
log() {
printf '[%s] release: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$*"
}
# ── Argument parsing ─────────────────────────────────────────────────────
# VAULT_ACTION_TOML is exported by the runner entrypoint (entrypoint-runner.sh)
action_id="${1:-}"
if [ -z "$action_id" ]; then
log "ERROR: action-id argument required"
exit 1
fi
action_toml="${VAULT_ACTION_TOML:-${OPS_REPO_ROOT}/vault/actions/${action_id}.toml}"
if [ ! -f "$action_toml" ]; then
log "ERROR: vault action TOML not found: ${action_toml}"
exit 1
fi
# Extract version from context field (e.g. "Release v1.2.0" → "v1.2.0")
context=$(grep -E '^context\s*=' "$action_toml" \
| sed -E 's/^context\s*=\s*"(.*)"/\1/' | tr -d '\r')
RELEASE_VERSION=$(echo "$context" | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+') || true
if [ -z "${RELEASE_VERSION:-}" ]; then
log "ERROR: could not extract version from context: '${context}'"
log "Context must contain a version like v1.2.0"
exit 1
fi
log "Starting release ${RELEASE_VERSION} (action: ${action_id})"
# ── Step 1: Preflight ────────────────────────────────────────────────────
log "Step 1/6: Preflight checks"
# Validate version format
if ! echo "$RELEASE_VERSION" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then
log "ERROR: invalid version format: ${RELEASE_VERSION}"
exit 1
fi
# Required env vars
for var in FORGE_URL FORGE_TOKEN FORGE_REPO PRIMARY_BRANCH; do
if [ -z "${!var:-}" ]; then
log "ERROR: required env var not set: ${var}"
exit 1
fi
done
# Check Docker access
if ! docker info >/dev/null 2>&1; then
log "ERROR: Docker not accessible"
exit 1
fi
# Check tag doesn't already exist on Forgejo
if curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_URL}/api/v1/repos/${FORGE_REPO}/tags/${RELEASE_VERSION}" >/dev/null 2>&1; then
log "ERROR: tag ${RELEASE_VERSION} already exists on Forgejo"
exit 1
fi
log "Preflight passed"
# ── Step 2: Tag main via Forgejo API ─────────────────────────────────────
log "Step 2/6: Creating tag ${RELEASE_VERSION} on Forgejo"
# Get HEAD SHA of primary branch
head_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_URL}/api/v1/repos/${FORGE_REPO}/branches/${PRIMARY_BRANCH}" \
| jq -r '.commit.id // empty')
if [ -z "$head_sha" ]; then
log "ERROR: could not get HEAD SHA for ${PRIMARY_BRANCH}"
exit 1
fi
# Create tag via API
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_URL}/api/v1/repos/${FORGE_REPO}/tags" \
-d "{\"tag_name\":\"${RELEASE_VERSION}\",\"target\":\"${head_sha}\",\"message\":\"Release ${RELEASE_VERSION}\"}" \
>/dev/null
log "Tag ${RELEASE_VERSION} created (SHA: ${head_sha})"
# ── Step 3: Push tag to mirrors ──────────────────────────────────────────
log "Step 3/6: Pushing tag to mirrors"
# Extract org/repo from FORGE_REPO (e.g. "disinto-admin/disinto" → "disinto")
project_name="${FORGE_REPO##*/}"
# Push to GitHub mirror (if GITHUB_TOKEN is available)
if [ -n "${GITHUB_TOKEN:-}" ]; then
log "Pushing tag to GitHub mirror"
# Create tag on GitHub via API
if curl -sf -X POST \
-H "Authorization: token ${GITHUB_TOKEN}" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/Disinto/${project_name}/git/refs" \
-d "{\"ref\":\"refs/tags/${RELEASE_VERSION}\",\"sha\":\"${head_sha}\"}" \
>/dev/null 2>&1; then
log "GitHub: tag pushed"
else
log "WARNING: GitHub tag push failed (may already exist)"
fi
else
log "WARNING: GITHUB_TOKEN not set — skipping GitHub mirror"
fi
# Push to Codeberg mirror (if CODEBERG_TOKEN is available)
if [ -n "${CODEBERG_TOKEN:-}" ]; then
log "Pushing tag to Codeberg mirror"
# Codeberg uses Gitea-compatible API
# Extract owner from FORGE_REPO for Codeberg (use same owner)
codeberg_owner="${FORGE_REPO%%/*}"
if curl -sf -X POST \
-H "Authorization: token ${CODEBERG_TOKEN}" \
-H "Content-Type: application/json" \
"https://codeberg.org/api/v1/repos/${codeberg_owner}/${project_name}/tags" \
-d "{\"tag_name\":\"${RELEASE_VERSION}\",\"target\":\"${head_sha}\",\"message\":\"Release ${RELEASE_VERSION}\"}" \
>/dev/null 2>&1; then
log "Codeberg: tag pushed"
else
log "WARNING: Codeberg tag push failed (may already exist)"
fi
else
log "WARNING: CODEBERG_TOKEN not set — skipping Codeberg mirror"
fi
# ── Step 4: Build agents Docker image ────────────────────────────────────
log "Step 4/6: Building agents Docker image"
cd "$FACTORY_ROOT" || exit 1
docker compose build --no-cache agents 2>&1 | tail -5
log "Image built"
# ── Step 5: Tag image with version ───────────────────────────────────────
log "Step 5/6: Tagging image"
docker tag disinto/agents:latest "disinto/agents:${RELEASE_VERSION}"
log "Tagged disinto/agents:${RELEASE_VERSION}"
# ── Step 6: Restart agent containers ─────────────────────────────────────
log "Step 6/6: Restarting agent containers"
docker compose stop agents agents-llama 2>/dev/null || true
docker compose up -d agents agents-llama
log "Agent containers restarted"
# ── Done ─────────────────────────────────────────────────────────────────
log "Release ${RELEASE_VERSION} completed successfully"

View file

@ -1,245 +0,0 @@
# formulas/release.toml — Release formula
#
# Defines the release workflow: tag Forgejo main, push to mirrors, build
# and tag the agents Docker image, and restart agents.
#
# Triggered by vault PR approval (human creates vault PR, approves it, then
# runner executes via `disinto run <id>`).
#
# Example vault item:
# id = "release-v1.2.0"
# formula = "release"
# context = "Tag v1.2.0 — includes vault redesign, .profile system, architect agent"
# secrets = []
#
# Steps: preflight → tag-main → push-mirrors → build-image → tag-image → restart-agents → commit-result
name = "release"
description = "Tag Forgejo main, push to mirrors, build and tag agents image, restart agents"
version = 1
[context]
files = ["docker-compose.yml"]
# ─────────────────────────────────────────────────────────────────────────────────
# Step 1: preflight
# ─────────────────────────────────────────────────────────────────────────────────
[[steps]]
id = "preflight"
title = "Validate release prerequisites"
description = """
Validate release prerequisites before proceeding.
1. Check that RELEASE_VERSION is set:
- Must be in format: v1.2.3 (semver with 'v' prefix)
- Validate with regex: ^v[0-9]+\\.[0-9]+\\.[0-9]+$
- If not set, exit with error
2. Check that FORGE_TOKEN and FORGE_URL are set:
- Required for Forgejo API calls
3. Check that DOCKER_HOST is accessible:
- Test with: docker info
- Required for image build
4. Check current branch is main:
- git rev-parse --abbrev-ref HEAD
- Must be 'main' or 'master'
5. Pull latest code:
- git fetch origin "$PRIMARY_BRANCH"
- git reset --hard origin/"$PRIMARY_BRANCH"
- Ensure working directory is clean
6. Check if tag already exists locally:
- git tag -l "$RELEASE_VERSION"
- If exists, exit with error
7. Check if tag already exists on Forgejo:
- curl -sf -H "Authorization: token $FORGE_TOKEN" \
- "$FORGE_URL/api/v1/repos/$FORGE_REPO/git/tags/$RELEASE_VERSION"
- If exists, exit with error
8. Export RELEASE_VERSION for subsequent steps:
- export RELEASE_VERSION (already set from vault action)
"""
# ─────────────────────────────────────────────────────────────────────────────────
# Step 2: tag-main
# ─────────────────────────────────────────────────────────────────────────────────
[[steps]]
id = "tag-main"
title = "Create tag on Forgejo main via API"
description = """
Create the release tag on Forgejo main via the Forgejo API.
1. Get current HEAD SHA of main:
- curl -sf -H "Authorization: token $FORGE_TOKEN" \
- "$FORGE_URL/api/v1/repos/$FORGE_REPO/branches/$PRIMARY_BRANCH"
- Parse sha field from response
2. Create tag via Forgejo API:
- curl -sf -X POST \
- -H "Authorization: token $FORGE_TOKEN" \
- -H "Content-Type: application/json" \
- "$FORGE_URL/api/v1/repos/$FORGE_REPO/tags" \
- -d "{\"tag\":\"$RELEASE_VERSION\",\"target\":\"$HEAD_SHA\",\"message\":\"Release $RELEASE_VERSION\"}"
- Parse response for success
3. Log the tag creation:
- echo "Created tag $RELEASE_VERSION on Forgejo (SHA: $HEAD_SHA)"
4. Store HEAD SHA for later verification:
- echo "$HEAD_SHA" > /tmp/release-head-sha
"""
# ─────────────────────────────────────────────────────────────────────────────────
# Step 3: push-mirrors
# ─────────────────────────────────────────────────────────────────────────────────
[[steps]]
id = "push-mirrors"
title = "Push tag to mirrors (Codeberg, GitHub)"
description = """
Push the newly created tag to all configured mirrors.
1. Add mirror remotes if not already present:
- Codeberg: git remote add codeberg git@codeberg.org:${FORGE_REPO_OWNER}/${PROJECT_NAME}.git
- GitHub: git remote add github git@github.com:disinto/${PROJECT_NAME}.git
- Check with: git remote -v
2. Push tag to Codeberg:
- git push codeberg "$RELEASE_VERSION" --tags
- Or push all tags: git push codeberg --tags
3. Push tag to GitHub:
- git push github "$RELEASE_VERSION" --tags
- Or push all tags: git push github --tags
4. Verify tags exist on mirrors:
- curl -sf -H "Authorization: token $GITHUB_TOKEN" \
- "https://api.github.com/repos/disinto/${PROJECT_NAME}/tags/$RELEASE_VERSION"
- curl -sf -H "Authorization: token $FORGE_TOKEN" \
- "$FORGE_URL/api/v1/repos/$FORGE_REPO/git/tags/$RELEASE_VERSION"
5. Log success:
- echo "Tag $RELEASE_VERSION pushed to mirrors"
"""
# ─────────────────────────────────────────────────────────────────────────────────
# Step 4: build-image
# ─────────────────────────────────────────────────────────────────────────────────
[[steps]]
id = "build-image"
title = "Build agents Docker image"
description = """
Build the new agents Docker image with the tagged code.
1. Build image without cache to ensure fresh build:
- docker compose build --no-cache agents
2. Verify image was created:
- docker images | grep disinto-agents
- Check image exists and has recent timestamp
3. Store image ID for later:
- docker images disinto-agents --format "{{.ID}}" > /tmp/release-image-id
4. Log build completion:
- echo "Built disinto-agents image"
"""
# ─────────────────────────────────────────────────────────────────────────────────
# Step 5: tag-image
# ─────────────────────────────────────────────────────────────────────────────────
[[steps]]
id = "tag-image"
title = "Tag Docker image with version"
description = """
Tag the newly built agents image with the release version.
1. Get the untagged image ID:
- docker images disinto-agents --format "{{.ID}}" --no-trunc | head -1
2. Tag the image:
- docker tag disinto-agents disinto-agents:$RELEASE_VERSION
3. Verify tag:
- docker images disinto-agents
4. Log tag:
- echo "Tagged disinto-agents:$RELEASE_VERSION"
"""
# ─────────────────────────────────────────────────────────────────────────────────
# Step 6: restart-agents
# ─────────────────────────────────────────────────────────────────────────────────
[[steps]]
id = "restart-agents"
title = "Restart agent containers with new image"
description = """
Restart agent containers to use the new image.
1. Pull the new image (in case it was pushed somewhere):
- docker compose pull agents
2. Stop and remove existing agent containers:
- docker compose down agents agents-llama 2>/dev/null || true
3. Start agents with new image:
- docker compose up -d agents agents-llama
4. Wait for containers to be healthy:
- for i in {1..30}; do
- if docker inspect --format='{{.State.Health.Status}}' agents | grep -q healthy; then
- echo "Agents container healthy"; break
- fi
- sleep 5
- done
5. Verify containers are running:
- docker compose ps agents agents-llama
6. Log restart:
- echo "Restarted agents containers"
"""
# ─────────────────────────────────────────────────────────────────────────────────
# Step 7: commit-result
# ─────────────────────────────────────────────────────────────────────────────────
[[steps]]
id = "commit-result"
title = "Write release result"
description = """
Write the release result to a file for tracking.
1. Get the image ID:
- IMAGE_ID=$(cat /tmp/release-image-id)
2. Create result file:
- cat > /tmp/release-result.json <<EOF
- {
- "version": "$RELEASE_VERSION",
- "image_id": "$IMAGE_ID",
- "forgejo_tag_url": "$FORGE_URL/$FORGE_REPO/src/$RELEASE_VERSION",
- "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
- "status": "success"
- }
- EOF
3. Copy result to data directory:
- mkdir -p "$PROJECT_REPO_ROOT/release"
- cp /tmp/release-result.json "$PROJECT_REPO_ROOT/release/$RELEASE_VERSION.json"
4. Log result:
- cat /tmp/release-result.json
5. Clean up temp files:
- rm -f /tmp/release-head-sha /tmp/release-image-id /tmp/release-result.json
"""

View file

@ -1,37 +0,0 @@
# formulas/reproduce.toml — Reproduce-agent formula
#
# Declares the reproduce-agent's runtime parameters.
# The dispatcher reads this to configure the sidecar container.
#
# stack_script: path (relative to PROJECT_REPO_ROOT) of the script used to
# restart/rebuild the project stack before reproduction. Omit (or leave
# blank) to connect to an existing staging environment instead.
#
# tools: MCP servers to pass to claude via --mcp-server flags.
#
# timeout_minutes: hard upper bound on the Claude session.
#
# Exit gate logic (standard mode):
# 1. Can I reproduce it? → NO → rejected/blocked → EXIT
# → YES → continue
# 2. Is the cause obvious? → YES → in-progress + backlog issue → EXIT
# → NO → in-triage → EXIT
#
# Exit gate logic (verification mode):
# Triggered when all sub-issues of a parent bug-report are closed.
# 1. Bug fixed → comment "verified fixed", remove in-progress, close issue
# 2. Bug persists → comment "still reproduces", add in-triage, re-enter triage
#
# Turn budget (standard mode): 60% on step 1 (reproduction), 40% on step 2 (cause check).
# Turn budget (verification mode): 100% on re-running reproduction steps.
name = "reproduce"
description = "Primary: reproduce the bug. Secondary: check if cause is obvious. Exit gates enforced."
version = 1
# Set stack_script to the restart command for local stacks.
# Leave empty ("") to target an existing staging environment.
stack_script = ""
tools = ["playwright"]
timeout_minutes = 15

View file

@ -61,25 +61,6 @@ Do NOT flag:
- Things that look wrong but actually work verify by reading the code first - Things that look wrong but actually work verify by reading the code first
- Files that were truncated from the diff (the orchestrator notes truncation) - Files that were truncated from the diff (the orchestrator notes truncation)
## 3b. Architecture and documentation consistency
For each BEHAVIORAL change in the diff (not pure bug fixes or formatting):
1. Identify what behavior changed (e.g., scheduling mechanism, auth flow,
container lifecycle, secret handling)
2. Search AGENTS.md for claims about that behavior:
grep -n '<keyword>' AGENTS.md
Also check docs/ and any per-directory AGENTS.md files.
3. Search for Architecture Decision references (AD-001 through AD-006):
grep -n 'AD-0' AGENTS.md
Read each AD and check if the PR's changes contradict it.
4. If the PR changes behavior described in AGENTS.md or contradicts an AD
but does NOT update the documentation in the same PR:
REQUEST_CHANGES require the documentation update in the same PR.
This check is SKIPPED for pure bug fixes where the intended behavior is
unchanged (the code was wrong, not the documentation).
## 4. Vault item quality (conditional) ## 4. Vault item quality (conditional)
If the PR adds or modifies vault item files (`vault/pending/*.md` in the ops repo), apply these If the PR adds or modifies vault item files (`vault/pending/*.md` in the ops repo), apply these
@ -131,7 +112,7 @@ near-duplicate exists, REQUEST_CHANGES and reference the existing item.
Agents must NEVER execute external actions directly. Any action that touches Agents must NEVER execute external actions directly. Any action that touches
an external system (publish, deploy, post, push to external registry, API an external system (publish, deploy, post, push to external registry, API
calls to third-party services) MUST go through vault dispatch i.e., the calls to third-party services) MUST go through vault dispatch i.e., the
agent files a vault item (`$OPS_REPO_ROOT/vault/pending/*.json`) and the runner agent files a vault item (`$OPS_REPO_ROOT/vault/pending/*.json`) and the vault-runner
container executes it with injected secrets. container executes it with injected secrets.
Scan the diff for these patterns: Scan the diff for these patterns:
@ -147,7 +128,8 @@ Scan the diff for these patterns:
If ANY of these patterns appear in agent code (scripts in `dev/`, `action/`, If ANY of these patterns appear in agent code (scripts in `dev/`, `action/`,
`planner/`, `gardener/`, `supervisor/`, `predictor/`, `review/`, `formulas/`, `planner/`, `gardener/`, `supervisor/`, `predictor/`, `review/`, `formulas/`,
`lib/`) WITHOUT routing through vault dispatch (file a vault PR on ops repo see #73-#77), **REQUEST_CHANGES**. `lib/`) WITHOUT routing through vault dispatch (`$OPS_REPO_ROOT/vault/pending/`, `vault-fire.sh`,
`vault-run-action.sh`), **REQUEST_CHANGES**.
Explain that external actions must use vault dispatch per AD-006. The agent Explain that external actions must use vault dispatch per AD-006. The agent
should file a vault item instead of executing directly. should file a vault item instead of executing directly.
@ -155,7 +137,7 @@ should file a vault item instead of executing directly.
**Exceptions** (do NOT flag these): **Exceptions** (do NOT flag these):
- Code inside `vault/` the vault system itself is allowed to handle secrets - Code inside `vault/` the vault system itself is allowed to handle secrets
- References in comments or documentation explaining the architecture - References in comments or documentation explaining the architecture
- `bin/disinto` setup commands that manage `.env.vault.enc` and the `run` subcommand - `bin/disinto` setup commands that manage `.env.vault.enc`
- Local operations (git push to forge, forge API calls with `FORGE_TOKEN`) - Local operations (git push to forge, forge API calls with `FORGE_TOKEN`)
## 6. Re-review (if previous review is provided) ## 6. Re-review (if previous review is provided)
@ -196,16 +178,8 @@ tech-debt issues via API so they are tracked separately:
-H "Content-Type: application/json" "$FORGE_API/issues" \ -H "Content-Type: application/json" "$FORGE_API/issues" \
-d '{"title":"...","body":"Flagged by AI reviewer in PR #NNN.\n\n## Problem\n...\n\n---\n*Auto-created from AI review*","labels":[TECH_DEBT_ID]}' -d '{"title":"...","body":"Flagged by AI reviewer in PR #NNN.\n\n## Problem\n...\n\n---\n*Auto-created from AI review*","labels":[TECH_DEBT_ID]}'
File a tech-debt issue for every finding rated **medium** or higher that Only create follow-ups for clear, actionable tech debt. Do not create
is pre-existing (not introduced by this PR). Also file for **low** findings issues for minor style nits or speculative improvements.
that represent correctness risks (dead code that masks bugs, misleading
documentation, unguarded variables under set -u).
Do NOT file for: style preferences, naming opinions, missing comments,
or speculative improvements with no concrete failure mode.
When in doubt, file. A closed-as-wontfix tech-debt issue costs nothing;
an unfiled bug costs a future debugging session.
## 8. Verdict ## 8. Verdict
@ -218,11 +192,6 @@ Bias toward APPROVE for small, correct changes. Use REQUEST_CHANGES only
for actual problems (bugs, security issues, broken functionality, missing for actual problems (bugs, security issues, broken functionality, missing
required behavior). Use DISCUSS sparingly. required behavior). Use DISCUSS sparingly.
Note: The bias toward APPROVE applies to code correctness and style decisions.
It does NOT apply to documentation consistency (step 3b) or tech-debt filing
(step 7) those are separate concerns that should be handled regardless of
the change's correctness.
## 9. Output ## 9. Output
Write a single JSON object to the file path from REVIEW_OUTPUT_FILE. Write a single JSON object to the file path from REVIEW_OUTPUT_FILE.

View file

@ -1,296 +0,0 @@
# formulas/run-architect.toml — Architect formula
#
# Executed by architect-run.sh via polling loop — strategic decomposition of vision
# issues into development sprints.
#
# This formula orchestrates the architect agent's workflow:
# Step 1: Preflight — bash handles state management:
# - Fetch open vision issues from Forgejo API
# - Fetch open architect PRs on ops repo
# - Fetch merged architect PRs (already pitched visions)
# - Filter: remove visions with open PRs, merged sprints, or sub-issues
# - Select up to 3 remaining vision issues for pitching
# Step 2: Stateless pitch generation — for each selected issue:
# - Invoke claude -p with: vision issue body + codebase context
# - Model NEVER calls Forgejo API — only generates pitch markdown
# - Bash creates the ops PR with pitch content
# - Bash posts the ACCEPT/REJECT footer comment
# Step 3: Sprint PR creation with questions (issue #101) (one PR per pitch)
# Step 4: Answer parsing + sub-issue filing (issue #102)
#
# Architecture:
# - Bash script (architect-run.sh) handles ALL state management
# - Model calls are stateless — no Forgejo API access, no memory between calls
# - Dedup is automatic via bash filters (no journal-based memory needed)
# - Max 3 open architect PRs at any time
#
# AGENTS.md maintenance is handled by the gardener (#246).
name = "run-architect"
description = "Architect: strategic decomposition of vision into sprints"
version = 2
model = "opus"
[context]
files = ["VISION.md", "AGENTS.md"]
# Prerequisite tree loaded from ops repo (ops: prefix)
# Sprints directory tracked in ops repo
[[steps]]
id = "preflight"
title = "Preflight: bash-driven state management and issue selection"
description = """
This step performs preflight checks and selects up to 3 vision issues for pitching.
IMPORTANT: All state management is handled by bash (architect-run.sh), NOT the model.
Architecture Decision: Bash-driven orchestration with stateless model calls
- The model NEVER calls Forgejo API during pitching
- Bash fetches all data from Forgejo API (vision issues, open PRs, merged PRs)
- Bash filters and deduplicates (no model-level dedup or journal-based memory)
- For each selected issue, bash invokes stateless claude -p (model only generates pitch)
- Bash creates PRs and posts footer comments (no model API access)
Bash Actions (in architect-run.sh):
1. Fetch open vision issues from Forgejo API: GET /repos/{owner}/{repo}/issues?labels=vision&state=open
2. Fetch open architect PRs from ops repo: GET /repos/{owner}/{repo}/pulls?state=open
3. Fetch merged sprint PRs: GET /repos/{owner}/{repo}/pulls?state=closed (filter merged=true)
4. Filter out visions that:
- Already have open architect PRs (check PR body for issue number reference)
- Have in-progress label
- Have open sub-issues (check for 'Decomposed from #N' pattern)
- Have merged sprint PRs (decomposition already done)
5. Select up to (3 - open_architect_pr_count) remaining vision issues
6. If no issues remain AND no responses to process, signal PHASE:done
If open architect PRs exist, handle accept/reject responses FIRST (see Capability B below).
After handling existing PRs, count remaining open architect PRs and calculate pitch_budget.
## Multi-pitch selection (up to 3 per run)
After handling existing PRs, determine how many new pitches can be created:
pitch_budget = 3 - <number of open architect PRs remaining after handling>
For each available pitch slot:
1. From the vision issues list, skip any issue that already has an open architect PR
2. Skip any issue that already has the `in-progress` label
3. Check for existing sub-issues filed from this vision issue
4. Check for merged sprint PRs referencing this vision issue
5. From remaining candidates, pick the most unblocking issue first
6. Add to ARCHITECT_TARGET_ISSUES array
Skip conditions:
- If no vision issues are found, signal PHASE:done
- If pitch_budget <= 0 (already 3 open architect PRs), skip pitching
- If all vision issues already have open architect PRs, signal PHASE:done
- If all vision issues have open sub-issues, skip pitching
- If all vision issues have merged sprint PRs, skip pitching
Output:
- Sets ARCHITECT_TARGET_ISSUES as a JSON array of issue numbers to pitch (up to 3)
"""
[[steps]]
id = "research_pitch"
title = "Stateless pitch generation: model generates content, bash creates PRs"
description = """
IMPORTANT: This step is executed by bash (architect-run.sh) via stateless claude -p calls.
The model NEVER calls Forgejo API it only reads context and generates pitch markdown.
Architecture:
- Bash orchestrates the loop over ARCHITECT_TARGET_ISSUES
- For each issue: bash fetches issue body from Forgejo API, then invokes stateless claude -p
- Model receives: vision issue body + codebase context (VISION.md, AGENTS.md, prerequisites.md)
- Model outputs: sprint pitch markdown ONLY (no API calls, no side effects)
- Bash creates the PR and posts the ACCEPT/REJECT footer comment
For each issue in ARCHITECT_TARGET_ISSUES, bash performs:
1. Fetch vision issue details from Forgejo API:
- GET /repos/{owner}/{repo}/issues/{issue_number}
- Extract: title, body
2. Invoke stateless claude -p with prompt:
"Write a sprint pitch for this vision issue. Output only the pitch markdown."
Context provided:
- Vision issue #N: <title>
- Vision issue body
- Project context (VISION.md, AGENTS.md)
- Codebase context (prerequisites.md, graph section)
- Formula content
3. Model generates pitch markdown (NO API CALLS):
# Sprint: <sprint-name>
## Vision issues
- #N — <title>
## What this enables
<what the project can do after this sprint that it can't do now>
## What exists today
<current state infrastructure, interfaces, code that can be reused>
## Complexity
<number of files/subsystems, estimated sub-issues>
<gluecode vs greenfield ratio>
## Risks
<what could go wrong, what breaks if this is done badly>
## Cost — new infra to maintain
<what ongoing maintenance burden does this sprint add>
<new services, scheduled tasks, formulas, agent roles>
## Recommendation
<architect's assessment: worth it / defer / alternative approach>
IMPORTANT: Do NOT include design forks or questions yet. The pitch is a go/no-go
decision for the human. Questions come only after acceptance.
4. Bash creates PR:
- Create branch: architect/sprint-{pitch-number}
- Write sprint spec to sprints/{sprint-slug}.md
- Create PR with pitch content as body
- Post footer comment: "Reply ACCEPT to proceed with design questions, or REJECT: <reason> to decline."
- Add in-progress label to vision issue
Output:
- One PR per vision issue (up to 3 per run)
- Each PR contains the pitch markdown
- If ARCHITECT_TARGET_ISSUES is empty, skip this step
"""
[[steps]]
id = "sprint_pr_creation"
title = "Sprint PR creation with questions (issue #101) — handled by bash"
description = """
IMPORTANT: PR creation is handled by bash (architect-run.sh) during the pitch step.
This step is for documentation only the actual PR creation happens in research_pitch.
## Approved PR → Initial design questions (issue #570)
When a sprint pitch PR receives an APPROVED review but has no `## Design forks`
section and no Q1:, Q2: comments yet, the architect enters a new state:
1. detect_approved_pending_questions() identifies this state
2. A fresh agent session starts with a special prompt
3. The agent reads the approved pitch, posts initial design questions (Q1:, Q2:, etc.)
4. The agent adds a `## Design forks` section to the PR body
5. The PR transitions into the questions phase, where the existing Q&A loop takes over
This ensures approved PRs don't sit indefinitely without design conversation.
Architecture:
- Bash creates PRs during stateless pitch generation (step 2)
- Model has no role in PR creation no Forgejo API access
- This step describes the PR format for reference
PR Format (created by bash):
1. Branch: architect/sprint-{pitch-number}
2. Sprint spec file: sprints/{sprint-slug}.md
Contains the pitch markdown from the model.
3. PR via Forgejo API:
- Title: architect: <sprint summary>
- Body: plain markdown text from model output
- Base: main (or PRIMARY_BRANCH)
- Head: architect/sprint-{pitch-number}
- Footer comment: "Reply ACCEPT to proceed with design questions, or REJECT: <reason> to decline."
4. Add in-progress label to vision issue:
- Look up label ID: GET /repos/{owner}/{repo}/labels
- Add label: POST /repos/{owner}/{repo}/issues/{issue_number}/labels
After creating all PRs, signal PHASE:done.
## Forgejo API Reference
All operations use the Forgejo API with Authorization: token ${FORGE_TOKEN} header.
### Create branch
```
POST /repos/{owner}/{repo}/branches
Body: {"new_branch_name": "architect/<sprint-slug>", "old_branch_name": "main"}
```
### Create/update file
```
PUT /repos/{owner}/{repo}/contents/<path>
Body: {"message": "sprint: add <sprint-slug>.md", "content": "<base64-encoded-content>", "branch": "architect/<sprint-slug>"}
```
### Create PR
```
POST /repos/{owner}/{repo}/pulls
Body: {"title": "architect: <sprint summary>", "body": "<markdown-text>", "head": "architect/<sprint-slug>", "base": "main"}
```
**Important: PR body format**
- The body field must contain plain markdown text (the raw content from the model)
- Do NOT JSON-encode or escape the body pass it as a JSON string value
- Newlines and markdown formatting (headings, lists, etc.) must be preserved as-is
### Add label to issue
```
POST /repos/{owner}/{repo}/issues/{index}/labels
Body: {"labels": [<label-id>]}
```
## Forgejo API Reference
All operations use the Forgejo API with `Authorization: token ${FORGE_TOKEN}` header.
### Create branch
```
POST /repos/{owner}/{repo}/branches
Body: {"new_branch_name": "architect/<sprint-slug>", "old_branch_name": "main"}
```
### Create/update file
```
PUT /repos/{owner}/{repo}/contents/<path>
Body: {"message": "sprint: add <sprint-slug>.md", "content": "<base64-encoded-content>", "branch": "architect/<sprint-slug>"}
```
### Create PR
```
POST /repos/{owner}/{repo}/pulls
Body: {"title": "architect: <sprint summary>", "body": "<markdown-text>", "head": "architect/<sprint-slug>", "base": "main"}
```
**Important: PR body format**
- The `body` field must contain **plain markdown text** (the raw content from the scratch file)
- Do NOT JSON-encode or escape the body pass it as a JSON string value
- Newlines and markdown formatting (headings, lists, etc.) must be preserved as-is
### Close PR
```
PATCH /repos/{owner}/{repo}/pulls/{index}
Body: {"state": "closed"}
```
### Delete branch
```
DELETE /repos/{owner}/{repo}/git/branches/<branch-name>
```
### Get labels (look up label IDs by name)
```
GET /repos/{owner}/{repo}/labels
```
### Add label to issue (for in-progress on vision issue)
```
POST /repos/{owner}/{repo}/issues/{index}/labels
Body: {"labels": [<label-id>]}
```
### Remove label from issue (for in-progress removal on REJECT)
```
DELETE /repos/{owner}/{repo}/issues/{index}/labels/{label-id}
```
"""

View file

@ -1,15 +1,16 @@
# formulas/run-gardener.toml — Gardener housekeeping formula # formulas/run-gardener.toml — Gardener housekeeping formula
# #
# Defines the gardener's complete run: grooming (Claude session via # Defines the gardener's complete run: grooming (Claude session via
# gardener-run.sh) + AGENTS.md maintenance + final commit-and-pr. # gardener-run.sh) + blocked-review + AGENTS.md maintenance + final
# commit-and-pr.
# #
# Gardener has journaling via .profile (issue #97), so it learns from # No memory, no journal. The gardener does mechanical housekeeping
# past runs and improves over time. # based on current state — it doesn't need to remember past runs.
# #
# Steps: preflight -> grooming -> dust-bundling -> agents-update -> commit-and-pr # Steps: preflight → grooming → dust-bundling → blocked-review → stale-pr-recycle → agents-update → commit-and-pr
name = "run-gardener" name = "run-gardener"
description = "Mechanical housekeeping: grooming, dust bundling, docs update" description = "Mechanical housekeeping: grooming, blocked review, docs update"
version = 1 version = 1
[context] [context]
@ -76,63 +77,6 @@ Pre-checks (bash, zero tokens — detect problems before invoking Claude):
6. Tech-debt promotion: list all tech-debt labeled issues goal is to 6. Tech-debt promotion: list all tech-debt labeled issues goal is to
process them all (promote to backlog or classify as dust). process them all (promote to backlog or classify as dust).
7. Bug-report detection: for each open unlabeled issue (no backlog, no
bug-report, no in-progress, no blocked, no underspecified, no vision,
no tech-debt), check whether it describes a user-facing bug with
reproduction steps. Criteria ALL must be true:
a. Body describes broken behavior (something that should work but
doesn't), NOT a feature request or enhancement
b. Body contains steps to reproduce (numbered list, "steps to
reproduce" heading, or clear sequence of actions that trigger the bug)
c. Issue is not already labeled
If all criteria match, enrich the issue body and write the manifest actions:
Body enrichment (CRITICAL turns raw reports into actionable investigation briefs):
Before writing the add_label action, construct an enriched body by appending
these sections to the original issue body:
a. ``## What was reported``
One or two sentence summary of the user's claim. Distill the broken
behavior concisely what the user expected vs. what actually happened.
b. ``## Known context``
What can be inferred from the codebase without running anything:
- Which contracts/components/files are involved (use AGENTS.md layout
and file paths mentioned in the issue or body)
- What the expected behavior should be (from VISION.md, docs, code)
- Any recent changes to involved components:
git log --oneline -5 -- <paths>
- Related issues or prior fixes (cross-reference by number if known)
c. ``## Reproduction plan``
Concrete steps for a reproduce-agent or human. Be specific:
- Which environment to use (e.g. "start fresh stack with
\`./scripts/dev.sh restart --full\`")
- Which transactions or actions to execute (with \`cast\` commands,
API calls, or UI navigation steps where applicable)
- What state to check after each step (contract reads, API queries,
UI observations, log output)
d. ``## What needs verification``
Checkboxes distinguishing known facts from unknowns:
- ``- [ ]`` Does the reported behavior actually occur? (reproduce)
- ``- [ ]`` Is <component X> behaving as expected? (check state)
- ``- [ ]`` Is the data flow correct from <A> to <B>? (trace)
Tailor these to the specific bug three to five items covering the
key unknowns a reproduce-agent must resolve.
e. Construct full new body = original body text + appended sections.
Write an edit_body action BEFORE the add_label action:
echo '{"action":"edit_body","issue":NNN,"body":"<full new body>"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
f. Write the add_label action:
echo '{"action":"add_label","issue":NNN,"label":"bug-report"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
echo "ACTION: labeled #NNN as bug-report — <reason>" >> "$RESULT_FILE"
Do NOT also add the backlog label bug-report is a separate triage
track that feeds into reproduction automation.
For each issue, choose ONE action and write to result file: For each issue, choose ONE action and write to result file:
ACTION (substantial promote, close duplicate, add acceptance criteria): ACTION (substantial promote, close duplicate, add acceptance criteria):
@ -176,17 +120,15 @@ DUST (trivial — single-line edit, rename, comment, style, whitespace):
of 3+ into one backlog issue. of 3+ into one backlog issue.
VAULT (needs human decision or external resource): VAULT (needs human decision or external resource):
File a vault procurement item using vault_request(): File a vault procurement item at $OPS_REPO_ROOT/vault/pending/<id>.md:
source "$(dirname "$0")/../lib/vault.sh" # <What decision or resource is needed>
TOML_CONTENT="# Vault action: <action_id> ## What
context = \"<description of what decision/resource is needed>\" <description>
unblocks = [\"#NNN\"] ## Why
<which issue this unblocks>
[execution] ## Unblocks
# Commands to run after approval - #NNN — <title>
" Log: echo "VAULT: filed $OPS_REPO_ROOT/vault/pending/<id>.md for #NNN — <reason>" >> "$RESULT_FILE"
PR_NUM=$(vault_request "<action_id>" "$TOML_CONTENT")
echo "VAULT: filed PR #${PR_NUM} for #NNN — <reason>" >> "$RESULT_FILE"
CLEAN (only if truly nothing to do): CLEAN (only if truly nothing to do):
echo 'CLEAN' >> "$RESULT_FILE" echo 'CLEAN' >> "$RESULT_FILE"
@ -200,7 +142,25 @@ Sibling dependency rule (CRITICAL):
NEVER add bidirectional ## Dependencies between siblings (creates deadlocks). NEVER add bidirectional ## Dependencies between siblings (creates deadlocks).
Use ## Related for cross-references: "## Related\n- #NNN (sibling)" Use ## Related for cross-references: "## Related\n- #NNN (sibling)"
6. Quality gate backlog label enforcement: 7. Architecture decision alignment check (AD check):
For each open issue labeled 'backlog', check whether the issue
contradicts any architecture decision listed in the
## Architecture Decisions section of AGENTS.md.
Read AGENTS.md and extract the AD table. For each backlog issue,
compare the issue title and body against each AD. If an issue
clearly violates an AD:
a. Write a comment action to the manifest:
echo '{"action":"comment","issue":NNN,"body":"Closing: violates AD-NNN (<decision summary>). See AGENTS.md § Architecture Decisions."}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
b. Write a close action to the manifest:
echo '{"action":"close","issue":NNN,"reason":"violates AD-NNN"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
c. Log to the result file:
echo "ACTION: closed #NNN — violates AD-NNN" >> "$RESULT_FILE"
Only close for clear, unambiguous violations. If the issue is
borderline or could be interpreted as compatible, leave it open
and file a VAULT item for human decision instead.
8. Quality gate backlog label enforcement:
For each open issue labeled 'backlog', verify it has the required For each open issue labeled 'backlog', verify it has the required
sections for dev-agent pickup: sections for dev-agent pickup:
a. Acceptance criteria body must contain at least one checkbox a. Acceptance criteria body must contain at least one checkbox
@ -221,65 +181,28 @@ Sibling dependency rule (CRITICAL):
Well-structured issues (both sections present) are left untouched Well-structured issues (both sections present) are left untouched
they are ready for dev-agent pickup. they are ready for dev-agent pickup.
8. Bug-report lifecycle auto-close resolved parent issues: 9. Portfolio lifecycle maintain ## Addressables and ## Observables in AGENTS.md:
For each open issue, check whether it is a parent that was decomposed Read the current Addressables and Observables tables from AGENTS.md.
into sub-issues. A parent is identified by having OTHER issues whose
body contains "Decomposed from #N" where N is the parent's number.
Algorithm: a. ADD: if a recently closed issue shipped a new deployment, listing,
a. From the open issues fetched in step 1, collect all issue numbers. package, or external presence not yet in the table, add a row.
b. For each open issue number N, search ALL issues (open AND closed) b. PROMOTE: if an addressable now has measurement wired (an evidence
for bodies containing "Decomposed from #N": process reads from it), move it to the Observables section.
curl -sf -H "Authorization: token $FORGE_TOKEN" \ c. REMOVE: if an addressable was decommissioned (vision change
"$FORGE_API/issues?state=all&type=issues&limit=50" \ invalidated it, service shut down), remove the row and log why.
| jq -r --argjson n N \ d. FLAG: if an addressable has been live > 2 weeks with Observable? = No
'[.[] | select(.body != null) | select(.body | test("Decomposed from #" + ($n | tostring) + "\\b"))] | length' and no evidence process is planned, add a comment to the result file:
If zero sub-issues found, skip this is not a decomposed parent. echo "ACTION: flagged addressable '<name>' — live >2 weeks, no observation path" >> "$RESULT_FILE"
c. If sub-issues exist, check whether ALL of them are closed: Stage AGENTS.md if changed the commit-and-pr step handles the actual commit.
curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/issues?state=all&type=issues&limit=50" \
| jq -r --argjson n N \
'[.[] | select(.body != null) | select(.body | test("Decomposed from #" + ($n | tostring) + "\\b"))]
| {total: length, closed: [.[] | select(.state == "closed")] | length}
| .total == .closed'
If the result is "false", some sub-issues are still open skip.
d. If ALL sub-issues are closed, collect sub-issue numbers and titles:
SUB_ISSUES=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/issues?state=all&type=issues&limit=50" \
| jq -r --argjson n N \
'[.[] | select(.body != null) | select(.body | test("Decomposed from #" + ($n | tostring) + "\\b"))]
| .[] | "- #\(.number) \(.title)"')
e. Write a comment action listing the resolved sub-issues.
Use jq to build valid JSON (sub-issue titles may contain quotes/backslashes,
and SUB_ISSUES is multiline raw interpolation would break JSONL):
COMMENT_BODY=$(printf 'All sub-issues have been resolved:\n%s\n\nClosing this parent issue as all decomposed work is complete.' "$SUB_ISSUES")
jq -n --argjson issue N --arg body "$COMMENT_BODY" \
'{action:"comment", issue: $issue, body: $body}' \
>> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
f. Write a close action:
jq -n --argjson issue N \
'{action:"close", issue: $issue, reason: "all sub-issues resolved"}' \
>> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
g. Log the action:
echo "ACTION: closed #N — all sub-issues resolved" >> "$RESULT_FILE"
Edge cases:
- Already closed parent: skipped (only open issues are processed)
- No sub-issues found: skipped (not a decomposed issue)
- Multi-cause bugs: stays open until ALL sub-issues are closed
Processing order: Processing order:
1. Handle PRIORITY_blockers_starving_factory first promote or resolve 1. Handle PRIORITY_blockers_starving_factory first promote or resolve
2. Quality gate strip backlog from issues missing acceptance criteria or affected files 2. AD alignment check close backlog issues that violate architecture decisions
3. Bug-report detection label qualifying issues before other classification 3. Quality gate strip backlog from issues missing acceptance criteria or affected files
4. Bug-report lifecycle close parents whose sub-issues are all resolved 4. Process tech-debt issues by score (impact/effort)
5. Process tech-debt issues by score (impact/effort) 5. Classify remaining items as dust or route to vault
6. Classify remaining items as dust or route to vault 6. Portfolio lifecycle update addressables/observables tables
Do NOT bundle dust yourself the dust-bundling step handles accumulation, Do NOT bundle dust yourself the dust-bundling step handles accumulation,
dedup, TTL expiry, and bundling into backlog issues. dedup, TTL expiry, and bundling into backlog issues.
@ -334,22 +257,137 @@ session, so changes there would be lost.
5. If no DUST items were emitted and no groups are ripe, skip this step. 5. If no DUST items were emitted and no groups are ripe, skip this step.
CRITICAL: If this step fails, log the failure and move on. CRITICAL: If this step fails, log the failure and move on to blocked-review.
""" """
needs = ["grooming"] needs = ["grooming"]
# ───────────────────────────────────────────────────────────────────── # ─────────────────────────────────────────────────────────────────────
# Step 4: agents-update — AGENTS.md watermark staleness + size enforcement # Step 4: blocked-review — triage blocked issues
# ─────────────────────────────────────────────────────────────────────
[[steps]]
id = "blocked-review"
title = "Review issues labeled blocked"
description = """
Review all issues labeled 'blocked' and decide their fate.
(See issue #352 for the blocked label convention.)
1. Fetch all blocked issues:
curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/issues?state=open&type=issues&labels=blocked&limit=50"
2. For each blocked issue, read the full body and comments:
curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/issues/<number>"
curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/issues/<number>/comments"
3. Check dependencies extract issue numbers from ## Dependencies /
## Depends on / ## Blocked by sections. For each dependency:
curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/issues/<dep_number>"
Check if the dependency is now closed.
4. For each blocked issue, choose ONE action:
UNBLOCK all dependencies are now closed or the blocking condition resolved:
a. Write a remove_label action to the manifest:
echo '{"action":"remove_label","issue":NNN,"label":"blocked"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
b. Write a comment action to the manifest:
echo '{"action":"comment","issue":NNN,"body":"Unblocked: <explanation of what resolved the blocker>"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
NEEDS HUMAN blocking condition is ambiguous, requires architectural
decision, or involves external factors:
a. Write a comment action to the manifest:
echo '{"action":"comment","issue":NNN,"body":"<diagnostic: what you found and what decision is needed>"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
b. Leave the 'blocked' label in place
CLOSE issue is stale (blocked 30+ days with no progress on blocker),
the blocker is wontfix, or the issue is no longer relevant:
a. Write a comment action to the manifest:
echo '{"action":"comment","issue":NNN,"body":"Closing: <reason — stale blocker, no longer relevant, etc.>"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
b. Write a close action to the manifest:
echo '{"action":"close","issue":NNN,"reason":"<stale blocker / no longer relevant / etc.>"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
CRITICAL: If this step fails, log the failure and move on.
"""
needs = ["dust-bundling"]
# ─────────────────────────────────────────────────────────────────────
# Step 5: stale-pr-recycle — recycle stale failed PRs back to backlog
# ─────────────────────────────────────────────────────────────────────
[[steps]]
id = "stale-pr-recycle"
title = "Recycle stale failed PRs back to backlog"
description = """
Detect open PRs where CI has failed and no work has happened in 24+ hours.
These represent abandoned dev-agent attempts recycle them so the pipeline
can retry with a fresh session.
1. Fetch all open PRs:
curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/pulls?state=open&limit=50"
2. For each PR, check all four conditions before recycling:
a. CI failed get the HEAD SHA from the PR's head.sha field, then:
curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/commits/<head_sha>/status"
Only proceed if the combined state is "failure" or "error".
Skip PRs with "success", "pending", or no CI status.
b. Last push > 24 hours ago get the commit details:
curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/git/commits/<head_sha>"
Parse the committer.date field. Only proceed if it is older than:
$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ)
c. Linked issue exists extract the issue number from the PR body.
Look for "Fixes #NNN" or "ixes #NNN" patterns (case-insensitive).
If no linked issue found, skip this PR (cannot reset labels).
d. No active tmux session check:
tmux has-session -t "dev-${PROJECT_NAME}-<issue_number>" 2>/dev/null
If a session exists, someone may still be working skip this PR.
3. For each PR that passes all checks (failed CI, 24+ hours stale,
linked issue found, no active session):
a. Write a comment on the PR explaining the recycle:
echo '{"action":"comment","issue":<pr_number>,"body":"Recycling stale CI failure for fresh attempt. Previous PR: #<pr_number>"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
b. Write a close_pr action:
echo '{"action":"close_pr","pr":<pr_number>}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
c. Remove the in-progress label from the linked issue:
echo '{"action":"remove_label","issue":<issue_number>,"label":"in-progress"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
d. Add the backlog label to the linked issue:
echo '{"action":"add_label","issue":<issue_number>,"label":"backlog"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
e. Log to result file:
echo "ACTION: recycled PR #<pr_number> (linked issue #<issue_number>) — stale CI failure" >> "$RESULT_FILE"
4. If no stale failed PRs found, skip this step.
CRITICAL: If this step fails, log the failure and move on to agents-update.
"""
needs = ["blocked-review"]
# ─────────────────────────────────────────────────────────────────────
# Step 6: agents-update — AGENTS.md watermark staleness + size enforcement
# ───────────────────────────────────────────────────────────────────── # ─────────────────────────────────────────────────────────────────────
[[steps]] [[steps]]
id = "agents-update" id = "agents-update"
title = "Check AGENTS.md watermarks, discover structural changes, update stale files" title = "Check AGENTS.md watermarks, update stale files, enforce size limit"
description = """ description = """
Maintain all AGENTS.md files by detecting structural drift since the last Check all AGENTS.md files for staleness, update any that are outdated, and
review. Uses git history as the source of truth not vibes. enforce the ~200-line size limit via progressive disclosure splitting.
This keeps documentation fresh runs 2x/day so drift stays small.
## Part A: Discover what changed ## Part A: Watermark staleness check and update
1. Read the HEAD SHA from preflight: 1. Read the HEAD SHA from preflight:
HEAD_SHA=$(cat /tmp/gardener-head-sha) HEAD_SHA=$(cat /tmp/gardener-head-sha)
@ -359,80 +397,110 @@ review. Uses git history as the source of truth — not vibes.
3. For each file, read the watermark from line 1: 3. For each file, read the watermark from line 1:
<!-- last-reviewed: <sha> --> <!-- last-reviewed: <sha> -->
If no watermark exists, treat the file as fully stale (review everything).
4. Check for changes since the watermark: 4. Check for changes since the watermark:
git log --oneline <watermark>..HEAD -- <directory> git log --oneline <watermark>..HEAD -- <directory>
If zero changes, the file is current skip it. If zero changes, the file is current skip it.
5. For each stale file, run a STRUCTURAL DIFF this is the core of the step: 5. For stale files:
- Read the AGENTS.md and the source files in that directory
- Update the documentation to reflect code changes since the watermark
- Set the watermark to the HEAD SHA from the preflight step
- Conventions: architecture and WHY not implementation details
a. FILE INVENTORY: list files at watermark vs HEAD for this directory: ## Part B: Size limit enforcement (progressive disclosure split)
git ls-tree -r --name-only <watermark> -- <directory>
git ls-tree -r --name-only HEAD -- <directory>
Diff the two lists. Categorize:
- NEW files: in HEAD but not in watermark
- DELETED files: in watermark but not in HEAD
- Check AGENTS.md layout section: does it list each current file?
Files present in the directory but absent from the layout = GAPS.
Files listed in the layout but missing from the directory = LIES.
b. REFERENCE VALIDATION: extract every file path, function name, and After all updates are done, count lines in the root AGENTS.md:
shell variable referenced in the AGENTS.md. For each:
- File paths: verify the file exists (ls or git ls-tree HEAD)
- Function names: grep for the definition in the codebase
- Script names: verify they exist where claimed
Any reference that fails validation is a LIE flag it for correction.
c. SEMANTIC CHANGES: for files that existed at both watermark and HEAD,
check if they changed meaningfully:
git diff <watermark>..HEAD -- <directory>/*.sh <directory>/*.py <directory>/*.toml
Look for: new exported functions, removed functions, renamed files,
changed CLI flags, new environment variables, new configuration.
Ignore: internal refactors, comment changes, formatting.
6. For each stale file, apply corrections:
- Add NEW files to the layout section
- Remove DELETED files from the layout section
- Fix every LIE found in reference validation
- Add notes about significant SEMANTIC CHANGES
- Set the watermark to HEAD_SHA
- Conventions: document architecture and WHY, not implementation details
## Part B: Size limit enforcement
After all updates, count lines in the root AGENTS.md:
wc -l < "$PROJECT_REPO_ROOT/AGENTS.md" wc -l < "$PROJECT_REPO_ROOT/AGENTS.md"
If it exceeds 200 lines, split verbose sections into per-directory files If the root AGENTS.md exceeds 200 lines, perform a progressive disclosure
using progressive disclosure: split. The principle: agent reads the map, drills into detail only when
needed. You wouldn't dump a 500-page wiki on a new hire's first morning.
7. Identify sections that can be extracted to per-directory files. 6. Identify per-directory sections to extract. Each agent section under
Keep the root AGENTS.md as a table of contents brief overview, "## Agents" (e.g. "### Dev (`dev/`)", "### Review (`review/`)") and
directory layout, summary tables with links to detail files. each helper section (e.g. "### Shared helpers (`lib/`)") is a candidate.
Also extract verbose subsections like "## Issue lifecycle and label
conventions" and "## Phase-Signaling Protocol" into docs/ or the
relevant directory.
8. For each extracted section, create a `{dir}/AGENTS.md` with: 7. For each section to extract, create a `{dir}/AGENTS.md` file with:
- Line 1: watermark <!-- last-reviewed: <HEAD_SHA> --> - Line 1: watermark <!-- last-reviewed: <HEAD_SHA> -->
- The full section content, preserving structure and detail - The full section content (role, trigger, key files, env vars, lifecycle)
- Keep the same markdown structure and detail level
9. Replace extracted sections in root with concise summaries + links. Example for dev/:
```
<!-- last-reviewed: abc123 -->
# Dev Agent
10. Verify root is under 200 lines. If still over, extract more. **Role**: Implement issues autonomously ...
**Trigger**: dev-poll.sh runs every 10 min ...
**Key files**: ...
**Environment variables consumed**: ...
**Lifecycle**: ...
```
8. Replace extracted sections in the root AGENTS.md with a concise
directory map table. The root file keeps ONLY:
- Watermark (line 1)
- ## What this repo is (brief overview)
- ## Directory layout (existing tree)
- ## Tech stack
- ## Coding conventions
- ## How to lint and test
- ## Agents — replaced with a summary table pointing to per-dir files:
## Agents
| Agent | Directory | Role | Guide |
|-------|-----------|------|-------|
| Dev | dev/ | Issue implementation | [dev/AGENTS.md](dev/AGENTS.md) |
| Review | review/ | PR review | [review/AGENTS.md](review/AGENTS.md) |
| Gardener | gardener/ | Backlog grooming | [gardener/AGENTS.md](gardener/AGENTS.md) |
| ... | ... | ... | ... |
- ## Shared helpers — replaced with a brief pointer:
"See [lib/AGENTS.md](lib/AGENTS.md) for the full helper reference."
Keep the summary table if it fits, or move it to lib/AGENTS.md.
- ## Issue lifecycle and label conventions — keep a brief summary
(labels table + dependency convention) or move verbose parts to
docs/PHASE-PROTOCOL.md
- ## Architecture Decisions — keep in root (humans write, agents enforce)
- ## Phase-Signaling Protocol — keep a brief summary with pointer:
"See [docs/PHASE-PROTOCOL.md](docs/PHASE-PROTOCOL.md) for the full spec."
9. Verify the root AGENTS.md is now under 200 lines:
LINE_COUNT=$(wc -l < "$PROJECT_REPO_ROOT/AGENTS.md")
if [ "$LINE_COUNT" -gt 200 ]; then
echo "WARNING: root AGENTS.md still $LINE_COUNT lines after split"
fi
If still over 200, trim further move more detail into per-directory
files. The root should read like a table of contents, not an encyclopedia.
10. Each new per-directory AGENTS.md must have a watermark on line 1.
The gardener maintains freshness for ALL AGENTS.md files root and
per-directory using the same watermark mechanism from Part A.
## Staging ## Staging
11. Stage all AGENTS.md files created or changed: 11. Stage ALL AGENTS.md files you created or changed do NOT commit yet.
All git writes happen in the commit-and-pr step at the end:
find . -name "AGENTS.md" -not -path "./.git/*" -exec git add {} + find . -name "AGENTS.md" -not -path "./.git/*" -exec git add {} +
12. If no files need updating AND root is under 200 lines, skip entirely. 12. If no AGENTS.md files need updating AND root is under 200 lines,
skip this step entirely.
CRITICAL: If this step fails for any reason, log the failure and move on. CRITICAL: If this step fails for any reason, log the failure and move on.
Do NOT let an AGENTS.md failure prevent the commit-and-pr step. Do NOT let an AGENTS.md failure prevent the commit-and-pr step.
""" """
needs = ["dust-bundling"] needs = ["stale-pr-recycle"]
# ───────────────────────────────────────────────────────────────────── # ─────────────────────────────────────────────────────────────────────
# Step 5: commit-and-pr — single commit with all file changes # Step 7: commit-and-pr — single commit with all file changes
# ───────────────────────────────────────────────────────────────────── # ─────────────────────────────────────────────────────────────────────
[[steps]] [[steps]]
@ -486,14 +554,16 @@ executes them after the PR merges.
PR_NUMBER=$(echo "$PR_RESPONSE" | jq -r '.number') PR_NUMBER=$(echo "$PR_RESPONSE" | jq -r '.number')
h. Save PR number for orchestrator tracking: h. Save PR number for orchestrator tracking:
echo "$PR_NUMBER" > /tmp/gardener-pr-${PROJECT_NAME}.txt echo "$PR_NUMBER" > /tmp/gardener-pr-${PROJECT_NAME}.txt
i. The orchestrator handles CI/review via pr_walk_to_merge. i. Signal the orchestrator to monitor CI:
The gardener stays alive to inject CI results and review feedback echo "PHASE:awaiting_ci" > "$PHASE_FILE"
as they come in, then executes the pending-actions manifest after merge. j. STOP and WAIT. Do NOT return to the primary branch.
The orchestrator polls CI, injects results and review feedback.
When you receive injected CI or review feedback, follow its
instructions, then write PHASE:awaiting_ci and wait again.
4. If no file changes existed (step 2 found nothing): 4. If no file changes existed (step 2 found nothing):
# Nothing to commit — the gardener has no work to do this run. echo "PHASE:done" > "$PHASE_FILE"
exit 0
5. If PR creation fails, log the error and exit. 5. If PR creation fails, log the error and write PHASE:failed.
""" """
needs = ["agents-update"] needs = ["agents-update"]

View file

@ -1,10 +1,10 @@
# formulas/run-planner.toml — Strategic planning formula (v4: graph-driven) # formulas/run-planner.toml — Strategic planning formula (v4: graph-driven)
# #
# Executed directly by planner-run.sh via polling loop — no action issues. # Executed directly by planner-run.sh via cron — no action issues.
# planner-run.sh creates a tmux session with Claude (opus) and injects # planner-run.sh creates a tmux session with Claude (opus) and injects
# this formula as context, plus the graph report from build-graph.py. # this formula as context, plus the graph report from build-graph.py.
# #
# Steps: preflight → triage-and-plan → commit-ops-changes # Steps: preflight → triage-and-plan → journal-and-commit
# #
# v4 changes from v3: # v4 changes from v3:
# - Graph report (orphans, cycles, thin objectives, bottlenecks) replaces # - Graph report (orphans, cycles, thin objectives, bottlenecks) replaces
@ -13,8 +13,7 @@
# - 3 steps instead of 6. # - 3 steps instead of 6.
# #
# AGENTS.md maintenance is handled by the gardener (#246). # AGENTS.md maintenance is handled by the gardener (#246).
# All git writes (tree, memory) happen in one commit at the end. # All git writes (tree, journal, memory) happen in one commit at the end.
# Journal writing is delegated to generic profile_write_journal() function.
name = "run-planner" name = "run-planner"
description = "Planner v4: graph-driven planning with tea helpers" description = "Planner v4: graph-driven planning with tea helpers"
@ -152,10 +151,13 @@ From the updated tree + graph bottlenecks, identify the top 5 constraints.
A constraint is an unresolved prerequisite blocking the most downstream objectives. A constraint is an unresolved prerequisite blocking the most downstream objectives.
Graph bottlenecks (high betweenness centrality) and thin objectives inform ranking. Graph bottlenecks (high betweenness centrality) and thin objectives inform ranking.
HUMAN_BLOCKED handling (needs human decision or external resource): Stuck issue handling:
- File a vault procurement item instead of skipping. First check for duplicates - BOUNCED/LABEL_CHURN: do NOT re-promote. Dispatch groom-backlog formula instead:
across ALL vault directories (pending/, approved/, fired/) if a file with the tea_file_issue "chore: break down #<N> — bounced <count>x" "<body>" "action"
same slug already exists in any of them, do NOT create a new one. - HUMAN_BLOCKED (needs human decision or external resource): file a vault
procurement item instead of skipping. First check for duplicates across ALL
vault directories (pending/, approved/, fired/) if a file with the same
slug already exists in any of them, do NOT create a new one.
Naming: $OPS_REPO_ROOT/vault/pending/<project>-<slug>.md (e.g. disinto-github-org.md). Naming: $OPS_REPO_ROOT/vault/pending/<project>-<slug>.md (e.g. disinto-github-org.md).
Write with this template: Write with this template:
@ -183,37 +185,10 @@ HUMAN_BLOCKED handling (needs human decision or external resource):
Then mark the prerequisite in the tree as "blocked-on-vault ($OPS_REPO_ROOT/vault/pending/<id>.md)". Then mark the prerequisite in the tree as "blocked-on-vault ($OPS_REPO_ROOT/vault/pending/<id>.md)".
Do NOT skip or mark as "awaiting human decision" the vault owns the human interface. Do NOT skip or mark as "awaiting human decision" the vault owns the human interface.
Template-or-vision filing gate (for non-stuck constraints): Filing gate (for non-stuck constraints):
1. Read issue templates from .codeberg/ISSUE_TEMPLATE/*.yaml: 1. Check if issue already exists (match by #number in tree or title search)
- bug.yaml: for broken/incorrect behavior (error in logs, failing test) 2. If no issue, create one with tea_file_issue using the template above
- feature.yaml: for new capabilities (prerequisite doesn't exist) 3. If issue exists and is open, skip no duplicates
- refactor.yaml: for restructuring without behavior change
2. Attempt to fill template fields:
- affected_files: list 3 or fewer specific files
- acceptance_criteria: write concrete, checkable criteria (max 5)
- proposed_solution/approach: is there one clear approach, or design forks?
3. Complexity test:
- If work touches ONE subsystem (3 or fewer files) AND no design forks
(only one reasonable approach) AND template fields fill confidently:
File as `backlog` using matching template format
- Otherwise Label `vision` with short body:
- Problem statement
- Why it's vision-sized
- Which objectives it blocks
- Include "## Why vision" section explaining complexity
4. Template selection heuristic:
- Bug template: planner identifies something broken (error in logs,
incorrect behavior, failing test)
- Feature template: new capability needed (prerequisite doesn't exist)
- Refactor template: existing code needs restructuring without behavior change
5. Filing steps:
- Check if issue already exists (match by #number in tree or title search)
- If no issue, create with tea_file_issue using template format
- If issue exists and is open, skip no duplicates
Priority label sync: Priority label sync:
- Add priority to current top-5 constraint issues (if missing): - Add priority to current top-5 constraint issues (if missing):
@ -242,13 +217,50 @@ CRITICAL: If any part of this step fails, log the failure and continue.
needs = ["preflight"] needs = ["preflight"]
[[steps]] [[steps]]
id = "commit-ops-changes" id = "journal-and-commit"
title = "Write tree, memory, and journal; commit and push" title = "Write tree, journal, optional memory; commit and PR"
description = """ description = """
### 1. Write prerequisite tree ### 1. Write prerequisite tree
Write to: $OPS_REPO_ROOT/prerequisites.md Write to: $OPS_REPO_ROOT/prerequisites.md
### 2. Memory update (every 5th run) ### 2. Write journal entry
Create/append to: $OPS_REPO_ROOT/journal/planner/$(date -u +%Y-%m-%d).md
Format:
# Planner run — YYYY-MM-DD HH:MM UTC
## Predictions triaged
- #NNN: ACTION — reasoning (or "No unreviewed predictions")
## Prerequisite tree updates
- Resolved: <list> - Discovered: <list> - Proposed: <list>
## Top 5 constraints
1. <prerequisite> blocks N objectives #NNN (existing|filed)
## Stuck issues detected
- #NNN: BOUNCED (Nx) — dispatched groom-backlog as #MMM
(or "No stuck issues detected")
## Vault items filed
- $OPS_REPO_ROOT/vault/pending/<id>.md <what> blocks #NNN
(or "No vault items filed")
## Issues created
- #NNN: title — why (or "No new issues")
## Priority label changes
- Added/removed priority: #NNN (or "No priority changes")
## Observations
- Key patterns noticed this run
## Deferred
- Items in tree beyond top 5, why not filed
Keep concise 30-50 lines max.
### 3. Memory update (every 5th run)
Count "# Planner run —" headers across all journal files. Count "# Planner run —" headers across all journal files.
Check "<!-- summarized-through-run: N -->" in planner-memory.md. Check "<!-- summarized-through-run: N -->" in planner-memory.md.
If (count - N) >= 5 or planner-memory.md missing, write to: If (count - N) >= 5 or planner-memory.md missing, write to:
@ -256,19 +268,15 @@ If (count - N) >= 5 or planner-memory.md missing, write to:
Include: run counter marker, date, constraint focus, patterns, direction. Include: run counter marker, date, constraint focus, patterns, direction.
Keep under 100 lines. Replace entire file. Keep under 100 lines. Replace entire file.
### 3. Commit ops repo changes ### 4. Commit ops repo changes
Commit the ops repo changes (prerequisites, memory, vault items): Commit the ops repo changes (prerequisites, journal, memory, vault items):
cd "$OPS_REPO_ROOT" cd "$OPS_REPO_ROOT"
git add prerequisites.md knowledge/planner-memory.md vault/pending/ git add prerequisites.md journal/planner/ knowledge/planner-memory.md vault/pending/
git add -u git add -u
if ! git diff --cached --quiet; then if ! git diff --cached --quiet; then
git commit -m "chore: planner run $(date -u +%Y-%m-%d)" git commit -m "chore: planner run $(date -u +%Y-%m-%d)"
git push origin "$PRIMARY_BRANCH" git push origin "$PRIMARY_BRANCH"
fi fi
cd "$PROJECT_REPO_ROOT" cd "$PROJECT_REPO_ROOT"
### 4. Write journal entry (generic)
The planner-run.sh wrapper will handle journal writing via profile_write_journal()
after the formula completes. This step is informational only.
""" """
needs = ["triage-and-plan"] needs = ["triage-and-plan"]

View file

@ -6,7 +6,7 @@
# Memory: previous predictions on the forge ARE the memory. # Memory: previous predictions on the forge ARE the memory.
# No separate memory file — the issue tracker is the source of truth. # No separate memory file — the issue tracker is the source of truth.
# #
# Executed by predictor/predictor-run.sh via polling loop — no action issues. # Executed by predictor/predictor-run.sh via cron — no action issues.
# predictor-run.sh creates a tmux session with Claude (sonnet) and injects # predictor-run.sh creates a tmux session with Claude (sonnet) and injects
# this formula as context. Claude executes all steps autonomously. # this formula as context. Claude executes all steps autonomously.
# #
@ -119,24 +119,27 @@ For each weakness you identify, choose one:
**Suggested action:** <what the planner should consider> **Suggested action:** <what the planner should consider>
**EXPLOIT** high confidence, have a theory you can test: **EXPLOIT** high confidence, have a theory you can test:
File a prediction/unreviewed issue AND a vault PR that dispatches File a prediction/unreviewed issue AND an action issue that dispatches
a formula to generate evidence (AD-006: external actions go through vault). a formula to generate evidence.
The prediction explains the theory. The vault PR triggers the proof The prediction explains the theory. The action generates the proof.
after human approval. When the planner runs next, evidence is already there. When the planner runs next, evidence is already there.
Vault dispatch (requires lib/vault.sh): Action issue body format (label: action):
source "$PROJECT_REPO_ROOT/lib/vault.sh" Dispatched by predictor to test theory in #<prediction_number>.
TOML_CONTENT="id = \"predict-<prediction_number>-<formula>\" ## Task
context = \"Test prediction #<prediction_number>: <theory summary> — focus: <specific test>\" Run <formula name> with focus on <specific test>.
formula = \"<formula-name>\"
secrets = [] ## Expected evidence
# Unblocks: #<prediction_number> Results in evidence/<dir>/<date>-<name>.json
# Expected evidence: evidence/<dir>/<date>-<name>.json
" ## Acceptance criteria
PR_NUM=$(vault_request "predict-<prediction_number>-<formula>" "$TOML_CONTENT") - [ ] Formula ran to completion
echo "Vault PR #${PR_NUM} filed to test prediction #<prediction_number>" - [ ] Evidence file written with structured results
## Affected files
- evidence/<dir>/
Available formulas (check $PROJECT_REPO_ROOT/formulas/*.toml for current list): Available formulas (check $PROJECT_REPO_ROOT/formulas/*.toml for current list):
cat "$PROJECT_REPO_ROOT/formulas/"*.toml | grep '^name' | head -10 cat "$PROJECT_REPO_ROOT/formulas/"*.toml | grep '^name' | head -10
@ -153,10 +156,10 @@ tea is pre-configured with login "$TEA_LOGIN" and repo "$FORGE_REPO".
tea issues create --login "$TEA_LOGIN" --repo "$FORGE_REPO" \ tea issues create --login "$TEA_LOGIN" --repo "$FORGE_REPO" \
--title "<title>" --body "<body>" --labels "prediction/unreviewed" --title "<title>" --body "<body>" --labels "prediction/unreviewed"
2. Dispatch formula via vault (if exploiting): 2. File action dispatches (if exploiting):
source "$PROJECT_REPO_ROOT/lib/vault.sh" tea issues create --login "$TEA_LOGIN" --repo "$FORGE_REPO" \
PR_NUM=$(vault_request "predict-NNN-<formula>" "$TOML_CONTENT") --title "action: test prediction #NNN — <formula> <focus>" \
# See EXPLOIT section above for TOML_CONTENT format --body "<body>" --labels "action"
3. Close superseded predictions: 3. Close superseded predictions:
tea issues close <number> --login "$TEA_LOGIN" --repo "$FORGE_REPO" tea issues close <number> --login "$TEA_LOGIN" --repo "$FORGE_REPO"
@ -170,11 +173,11 @@ tea is pre-configured with login "$TEA_LOGIN" and repo "$FORGE_REPO".
## Rules ## Rules
- Max 5 actions total (predictions + vault dispatches combined) - Max 5 actions total (predictions + action dispatches combined)
- Each exploit counts as 2 (prediction + vault dispatch) - Each exploit counts as 2 (prediction + action dispatch)
- So: 5 explores, or 2 exploits + 1 explore, or 1 exploit + 3 explores - So: 5 explores, or 2 exploits + 1 explore, or 1 exploit + 3 explores
- Never re-file a dismissed prediction without new evidence - Never re-file a dismissed prediction without new evidence
- Vault dispatches must reference existing formulas don't invent formulas - Action issues must reference existing formulas don't invent formulas
- Be specific: name the file, the metric, the threshold, the formula - Be specific: name the file, the metric, the threshold, the formula
- If no weaknesses found, file nothing that's a strong signal the project is healthy - If no weaknesses found, file nothing that's a strong signal the project is healthy

View file

@ -3,7 +3,7 @@
# Trigger: action issue created by planner (gap analysis), dev-poll (post-merge # Trigger: action issue created by planner (gap analysis), dev-poll (post-merge
# hook detecting site/ changes), or gardener (periodic SHA drift check). # hook detecting site/ changes), or gardener (periodic SHA drift check).
# #
# The dispatcher picks up the issue, executes these steps, posts results # The action-agent picks up the issue, executes these steps, posts results
# as a comment, and closes the issue. # as a comment, and closes the issue.
name = "run-publish-site" name = "run-publish-site"
@ -216,7 +216,7 @@ Check 3 — engagement evidence has been collected at least once:
jq -r '" visitors=\(.unique_visitors) pages=\(.page_views) referrals=\(.referred_visitors)"' "$LATEST" 2>/dev/null || true jq -r '" visitors=\(.unique_visitors) pages=\(.page_views) referrals=\(.referred_visitors)"' "$LATEST" 2>/dev/null || true
else else
echo "NOTE: No engagement reports yet — run: bash site/collect-engagement.sh" echo "NOTE: No engagement reports yet — run: bash site/collect-engagement.sh"
echo "The first report will appear after the scheduled collection runs (daily at 23:55 UTC)." echo "The first report will appear after the cron job runs (daily at 23:55 UTC)."
fi fi
Summary: Summary:

View file

@ -5,7 +5,7 @@
# the action and notifies the human for one-click copy-paste execution. # the action and notifies the human for one-click copy-paste execution.
# #
# Trigger: action issue created by planner or any formula. # Trigger: action issue created by planner or any formula.
# The dispatcher picks up the issue, executes these steps, writes a draft # The action-agent picks up the issue, executes these steps, writes a draft
# to vault/outreach/{platform}/drafts/, notifies the human via the forge, # to vault/outreach/{platform}/drafts/, notifies the human via the forge,
# and closes the issue. # and closes the issue.
# #

View file

@ -1,7 +1,7 @@
# formulas/run-supervisor.toml — Supervisor formula (health monitoring + remediation) # formulas/run-supervisor.toml — Supervisor formula (health monitoring + remediation)
# #
# Executed by supervisor/supervisor-run.sh via polling loop (every 20 minutes). # Executed by supervisor/supervisor-run.sh via cron (every 20 minutes).
# supervisor-run.sh runs claude -p via agent-sdk.sh and injects # supervisor-run.sh creates a tmux session with Claude (sonnet) and injects
# this formula with pre-collected metrics as context. # this formula with pre-collected metrics as context.
# #
# Steps: preflight → health-assessment → decide-actions → report → journal # Steps: preflight → health-assessment → decide-actions → report → journal
@ -34,15 +34,13 @@ and injected into your prompt above. Review them now.
(24h grace period). Check the "Stale Phase Cleanup" section for any (24h grace period). Check the "Stale Phase Cleanup" section for any
files cleaned or in grace period this run. files cleaned or in grace period this run.
2. Check vault state: read ${OPS_VAULT_ROOT:-$OPS_REPO_ROOT/vault/pending}/*.md for any procurement items 2. Check vault state: read $OPS_REPO_ROOT/vault/pending/*.md for any procurement items
the planner has filed. Note items relevant to the health assessment the planner has filed. Note items relevant to the health assessment
(e.g. a blocked resource that explains why the pipeline is stalled). (e.g. a blocked resource that explains why the pipeline is stalled).
Note: In degraded mode, vault items are stored locally.
3. Read the supervisor journal for recent history: 3. Read the supervisor journal for recent history:
JOURNAL_FILE="${OPS_JOURNAL_ROOT:-$OPS_REPO_ROOT/journal/supervisor}/$(date -u +%Y-%m-%d).md" JOURNAL_FILE="$OPS_REPO_ROOT/journal/supervisor/$(date -u +%Y-%m-%d).md"
if [ -f "$JOURNAL_FILE" ]; then cat "$JOURNAL_FILE"; fi if [ -f "$JOURNAL_FILE" ]; then cat "$JOURNAL_FILE"; fi
Note: In degraded mode, the journal is stored locally and not committed to git.
4. Note any values that cross these thresholds: 4. Note any values that cross these thresholds:
- RAM available < 500MB or swap > 3GB P0 (memory crisis) - RAM available < 500MB or swap > 3GB P0 (memory crisis)
@ -107,13 +105,8 @@ For each finding from the health assessment, decide and execute an action.
sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1 || true sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1 || true
**P1 Disk pressure:** **P1 Disk pressure:**
# First pass: dangling only (cheap, safe) # Docker cleanup
sudo docker system prune -f >/dev/null 2>&1 || true sudo docker system prune -f >/dev/null 2>&1 || true
# If still > 80%, escalate to all unused images (more aggressive but necessary)
_pct=$(df -h / | awk 'NR==2{print $5}' | tr -d '%')
if [ "${_pct:-0}" -gt 80 ]; then
sudo docker system prune -a -f >/dev/null 2>&1 || true
fi
# Truncate logs > 10MB # Truncate logs > 10MB
for f in "$FACTORY_ROOT"/{dev,review,supervisor,gardener,planner,predictor}/*.log; do for f in "$FACTORY_ROOT"/{dev,review,supervisor,gardener,planner,predictor}/*.log; do
[ -f "$f" ] && [ "$(du -k "$f" | cut -f1)" -gt 10240 ] && truncate -s 0 "$f" [ -f "$f" ] && [ "$(du -k "$f" | cut -f1)" -gt 10240 ] && truncate -s 0 "$f"
@ -144,22 +137,21 @@ For each finding from the health assessment, decide and execute an action.
**P3 Stale PRs (CI done >20min, no push since):** **P3 Stale PRs (CI done >20min, no push since):**
Do NOT read dev-poll.sh, push branches, attempt merges, or investigate pipeline code. Do NOT read dev-poll.sh, push branches, attempt merges, or investigate pipeline code.
Instead, file a vault item for the dev-agent to pick up: Instead, nudge the dev-agent via tmux injection if a session is alive:
Write ${OPS_VAULT_ROOT:-$OPS_REPO_ROOT/vault/pending}/stale-pr-${ISSUE_NUM}.md: # Find the dev session for this issue
# Stale PR: ${PR_TITLE} SESSION=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "dev-.*-${ISSUE_NUM}" | head -1)
## What if [ -n "$SESSION" ]; then
CI finished >20min ago but no git push has been made to the PR branch. # Inject a nudge into the dev-agent session
## Why tmux send-keys -t "$SESSION" "# [supervisor] PR stale >20min — CI finished, please push or update" Enter
P3 Factory degraded: PRs should be pushed within 20min of CI completion. fi
## Unblocks If no active tmux session exists, note it in the journal for the next dev-poll cycle.
- Factory health: dev-agent will push the branch and continue the workflow
Do NOT file vault items for stale PRs unless they remain stale for >3 consecutive runs. Do NOT file vault items for stale PRs unless they remain stale for >3 consecutive runs.
### Cannot auto-fix → file vault item ### Cannot auto-fix → file vault item
For P0-P2 issues that persist after auto-fix attempts, or issues requiring For P0-P2 issues that persist after auto-fix attempts, or issues requiring
human judgment, file a vault procurement item: human judgment, file a vault procurement item:
Write ${OPS_VAULT_ROOT:-$OPS_REPO_ROOT/vault/pending}/supervisor-<issue-slug>.md: Write $OPS_REPO_ROOT/vault/pending/supervisor-<issue-slug>.md:
# <What is needed> # <What is needed>
## What ## What
<description of the problem and why the supervisor cannot fix it> <description of the problem and why the supervisor cannot fix it>
@ -167,24 +159,14 @@ human judgment, file a vault procurement item:
<impact on factory health reference the priority level> <impact on factory health reference the priority level>
## Unblocks ## Unblocks
- Factory health: <what this resolves> - Factory health: <what this resolves>
Vault PR filed on ops repo human approves via PR review. The vault-poll will notify the human and track the request.
Note: In degraded mode (no ops repo), vault items are written locally to ${OPS_VAULT_ROOT:-local path}.
### Reading best-practices files Read the relevant best-practices file before taking action:
cat "$OPS_REPO_ROOT/knowledge/memory.md" # P0
Read the relevant best-practices file before taking action. In degraded mode, cat "$OPS_REPO_ROOT/knowledge/disk.md" # P1
use the bundled knowledge files from ${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}: cat "$OPS_REPO_ROOT/knowledge/ci.md" # P2 CI
cat "$OPS_REPO_ROOT/knowledge/dev-agent.md" # P2 agent
cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/memory.md" # P0 cat "$OPS_REPO_ROOT/knowledge/git.md" # P2 git
cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/disk.md" # P1
cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/ci.md" # P2 CI
cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/dev-agent.md" # P2 agent
cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/git.md" # P2 git
cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/review-agent.md" # P2 review
cat "${OPS_KNOWLEDGE_ROOT:-$OPS_REPO_ROOT/knowledge}/forge.md" # P2 forge
Note: If OPS_REPO_ROOT is not available (degraded mode), the bundled knowledge
files in ${OPS_KNOWLEDGE_ROOT:-<unset>} provide fallback guidance.
Track what you fixed and what vault items you filed for the report step. Track what you fixed and what vault items you filed for the report step.
""" """
@ -226,7 +208,7 @@ description = """
Append a timestamped entry to the supervisor journal. Append a timestamped entry to the supervisor journal.
File path: File path:
${OPS_JOURNAL_ROOT:-$OPS_REPO_ROOT/journal/supervisor}/$(date -u +%Y-%m-%d).md $OPS_REPO_ROOT/journal/supervisor/$(date -u +%Y-%m-%d).md
If the file already exists (multiple runs per day), append a new section. If the file already exists (multiple runs per day), append a new section.
If it does not exist, create it. If it does not exist, create it.
@ -259,24 +241,7 @@ run-to-run context so future supervisor runs can detect trends
IMPORTANT: Do NOT commit or push the journal it is a local working file. IMPORTANT: Do NOT commit or push the journal it is a local working file.
The journal directory is committed to git periodically by other agents. The journal directory is committed to git periodically by other agents.
Note: In degraded mode (no ops repo), the journal is written locally to After writing the journal, write the phase signal:
${OPS_JOURNAL_ROOT:-<unset>} and is NOT automatically committed to any repo. echo 'PHASE:done' > "$PHASE_FILE"
## Learning
If you discover something new during this run:
- In full mode (ops repo available): append to the relevant knowledge file:
echo "### Lesson title
Description of what you learned." >> "${OPS_REPO_ROOT}/knowledge/<file>.md"
- In degraded mode: write to the local knowledge directory for reference:
echo "### Lesson title
Description of what you learned." >> "${OPS_KNOWLEDGE_ROOT:-<unset>}/<file>.md"
Knowledge files: memory.md, disk.md, ci.md, forge.md, dev-agent.md,
review-agent.md, git.md.
After writing the journal, the agent session completes automatically.
""" """
needs = ["report"] needs = ["report"]

View file

@ -1,267 +0,0 @@
# formulas/triage.toml — Triage-agent formula (generic template)
#
# This is the base template for triage investigations.
# Project-specific formulas (e.g. formulas/triage-harb.toml) extend this by
# overriding the fields in the [project] section and providing stack-specific
# step descriptions.
#
# Triggered by: bug-report + in-triage label combination.
# Set by the reproduce-agent when:
# - Bug was confirmed (reproduced)
# - Quick log analysis did not reveal an obvious root cause
# - Reproduce-agent documented all steps taken and logs examined
#
# Steps:
# 1. read-findings — parse issue comments for prior reproduce-agent evidence
# 2. trace-data-flow — follow symptom through UI → API → backend → data store
# 3. instrumentation — throwaway branch, add logging, restart, observe
# 4. decompose — file backlog issues for each root cause
# 5. link-back — update original issue, swap in-triage → in-progress
# 6. cleanup — delete throwaway debug branch
#
# Best practices:
# - Start from reproduce-agent findings; do not repeat their work
# - Budget: 70% tracing data flow, 30% instrumented re-runs
# - Multiple causes: check if layered (Depends-on) or independent (Related)
# - Always delete the throwaway debug branch before finishing
# - If inconclusive after full turn budget: leave in-triage, post what was
# tried, do NOT relabel — supervisor handles stale triage sessions
#
# Project-specific formulas extend this template by defining:
# - stack_script: how to start/stop the project stack
# - [project].data_flow: layer names (e.g. "chain → indexer → GraphQL → UI")
# - [project].api_endpoints: which APIs/services to inspect
# - [project].stack_lock: stack lock configuration
# - Per-step description overrides with project-specific commands
#
# No hard timeout — runs until Claude hits its turn limit.
# Stack lock held for full run (triage is rare; blocking CI is acceptable).
name = "triage"
description = "Deep root cause analysis: trace data flow, add debug instrumentation, decompose causes into backlog issues."
version = 2
# Set stack_script to the restart command for local stacks.
# Leave empty ("") to connect to an existing staging environment.
stack_script = ""
tools = ["playwright"]
# ---------------------------------------------------------------------------
# Project-specific extension fields.
# Override these in formulas/triage-<project>.toml.
# ---------------------------------------------------------------------------
[project]
# Human-readable layer names for the data-flow trace (generic default).
# Example project override: "chain → indexer → GraphQL → UI"
data_flow = "UI → API → backend → data store"
# Comma-separated list of API endpoints or services to inspect.
# Example: "GraphQL /graphql, REST /api/v1, RPC ws://localhost:8545"
api_endpoints = ""
# Stack lock configuration (leave empty for default behavior).
# Example: "full" to hold a full stack lock during triage.
stack_lock = ""
# ---------------------------------------------------------------------------
# Steps
# ---------------------------------------------------------------------------
[[steps]]
id = "read-findings"
title = "Read reproduce-agent findings"
description = """
Before doing anything else, parse all prior evidence from the issue comments.
1. Fetch the issue body and all comments:
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${ISSUE_NUMBER}" | jq -r '.body'
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${ISSUE_NUMBER}/comments" | jq -r '.[].body'
2. Identify the reproduce-agent comment (look for sections like
"Reproduction steps", "Logs examined", "What was tried").
3. Extract and note:
- The exact symptom (error message, unexpected value, visual regression)
- Steps that reliably trigger the bug
- Log lines or API responses already captured
- Any hypotheses the reproduce-agent already ruled out
Do NOT repeat work the reproduce-agent already did. Your job starts where
theirs ended. If no reproduce-agent comment is found, note it and proceed
with fresh investigation using the issue body only.
"""
[[steps]]
id = "trace-data-flow"
title = "Trace data flow from symptom to source"
description = """
Systematically follow the symptom backwards through each layer of the stack.
Spend ~70% of your total turn budget here before moving to instrumentation.
Generic layer traversal (adapt to the project's actual stack):
UI API backend data store
For each layer boundary:
1. What does the upstream layer send?
2. What does the downstream layer expect?
3. Is there a mismatch? If yes is this the root cause or a symptom?
Tracing checklist:
a. Start at the layer closest to the visible symptom.
b. Read the relevant source files do not guess data shapes.
c. Cross-reference API contracts: compare what the code sends vs what it
should send according to schemas, type definitions, or documentation.
d. Check recent git history on suspicious files:
git log --oneline -20 -- <file>
e. Search for related issues or TODOs in the code:
grep -r "TODO\|FIXME\|HACK" -- <relevant directory>
Capture for each layer:
- The data shape flowing in and out (field names, types, nullability)
- Whether the layer's behavior matches its documented contract
- Any discrepancy found
If a clear root cause becomes obvious during tracing, note it and continue
checking whether additional causes exist downstream.
"""
needs = ["read-findings"]
[[steps]]
id = "instrumentation"
title = "Add debug instrumentation on a throwaway branch"
description = """
Use ~30% of your total turn budget here. Only instrument after tracing has
identified the most likely failure points do not instrument blindly.
1. Create a throwaway debug branch (NEVER commit this to main):
cd "$PROJECT_REPO_ROOT"
git checkout -b debug/triage-${ISSUE_NUMBER}
2. Add targeted logging at the layer boundaries identified during tracing:
- Console.log / structured log statements around the suspicious code path
- Log the actual values flowing through: inputs, outputs, intermediate state
- Add verbose mode flags if the stack supports them
- Keep instrumentation minimal only what confirms or refutes the hypothesis
3. Restart the stack using the configured script (if set):
${stack_script:-"# No stack_script configured — restart manually or connect to staging"}
4. Re-run the reproduction steps from the reproduce-agent findings.
5. Observe and capture new output:
- Paste relevant log lines into your working notes
- Note whether the observed values match or contradict the hypothesis
6. If the first instrumentation pass is inconclusive, iterate:
- Narrow the scope to the next most suspicious boundary
- Re-instrument, restart, re-run
- Maximum 2-3 instrumentation rounds before declaring inconclusive
Do NOT push the debug branch. It will be deleted in the cleanup step.
"""
needs = ["trace-data-flow"]
[[steps]]
id = "decompose"
title = "Decompose root causes into backlog issues"
description = """
After tracing and instrumentation, articulate each distinct root cause.
For each root cause found:
1. Determine the relationship to other causes:
- Layered (one causes another) use Depends-on in the issue body
- Independent (separate code paths fail independently) use Related
2. Create a backlog issue for each root cause:
curl -sf -X POST "${FORGE_API}/issues" \\
-H "Authorization: token ${FORGE_TOKEN}" \\
-H "Content-Type: application/json" \\
-d '{
"title": "fix: <specific description of root cause N>",
"body": "## Root cause\\n<exact code path, file:line>\\n\\n## Fix suggestion\\n<recommended approach>\\n\\n## Context\\nDecomposed from #${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>",
"labels": [{"name": "backlog"}]
}'
3. Note the newly created issue numbers.
If only one root cause is found, still create a single backlog issue with
the specific code location and fix suggestion.
If the investigation is inconclusive (no clear root cause found), skip this
step and proceed directly to link-back with the inconclusive outcome.
"""
needs = ["instrumentation"]
[[steps]]
id = "link-back"
title = "Update original issue and relabel"
description = """
Post a summary comment on the original issue and update its labels.
### If root causes were found (conclusive):
Post a comment:
"## Triage findings
Found N root cause(s):
- #X — <one-line description> (cause 1 of N)
- #Y — <one-line description> (cause 2 of N, depends on #X)
Data flow traced: <layer where the bug originates>
Instrumentation: <key log output that confirmed the cause>
Next step: backlog issues above will be implemented in dependency order."
Then swap labels:
- Remove: in-triage
- Add: in-progress
### If investigation was inconclusive (turn budget exhausted):
Post a comment:
"## Triage — inconclusive
Traced: <layers checked>
Tried: <instrumentation attempts and what they showed>
Hypothesis: <best guess at cause, if any>
No definitive root cause identified. Leaving in-triage for supervisor
to handle as a stale triage session."
Do NOT relabel. Leave in-triage. The supervisor monitors stale triage
sessions and will escalate or reassign.
**CRITICAL: Write outcome file** Always write the outcome to the outcome file:
- If root causes found (conclusive): echo "reproduced" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt
- If inconclusive: echo "needs-triage" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt
"""
needs = ["decompose"]
[[steps]]
id = "cleanup"
title = "Delete throwaway debug branch"
description = """
Always delete the debug branch, even if the investigation was inconclusive.
1. Switch back to the main branch:
cd "$PROJECT_REPO_ROOT"
git checkout "$PRIMARY_BRANCH"
2. Delete the local debug branch:
git branch -D debug/triage-${ISSUE_NUMBER}
3. Confirm no remote was pushed (if accidentally pushed, delete it too):
git push origin --delete debug/triage-${ISSUE_NUMBER} 2>/dev/null || true
4. Verify the worktree is clean:
git status
git worktree list
A clean repo is a prerequisite for the next dev-agent run. Never leave
debug branches behind they accumulate and pollute the branch list.
"""
needs = ["link-back"]

View file

@ -1,4 +1,4 @@
<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 --> <!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
# Gardener Agent # Gardener Agent
**Role**: Backlog grooming — detect duplicate issues, missing acceptance **Role**: Backlog grooming — detect duplicate issues, missing acceptance
@ -7,26 +7,22 @@ the quality gate: strips the `backlog` label from issues that lack acceptance
criteria checkboxes (`- [ ]`) or an `## Affected files` section. Invokes criteria checkboxes (`- [ ]`) or an `## Affected files` section. Invokes
Claude to fix what it can; files vault items for what it cannot. Claude to fix what it can; files vault items for what it cannot.
**Trigger**: `gardener-run.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh` **Trigger**: `gardener-run.sh` runs 4x/day via cron. Sources `lib/guard.sh` and
every 6 hours (iteration math at line 182-194). Sources `lib/guard.sh` and calls calls `check_active gardener` first — skips if `$FACTORY_ROOT/state/.gardener-active`
`check_active gardener` first — skips if `$FACTORY_ROOT/state/.gardener-active` is absent. is absent. Then creates a tmux session with `claude --model sonnet`, injects
**Early-exit optimization**: if no issues, PRs, or repo files have changed since the last `formulas/run-gardener.toml` as context, monitors the phase file, and cleans up
run (checked via Forgejo API and `git diff`), the model is not invoked — the run exits on completion or timeout (2h max session). No action issues — the gardener runs
immediately (no tmux session, no tokens consumed). Otherwise, creates a tmux session with directly from cron like the planner, predictor, and supervisor.
`claude --model sonnet`, injects `formulas/run-gardener.toml` as context, monitors the
phase file, and cleans up on completion or timeout (2h max session). No action issues —
the gardener runs as part of the polling loop alongside the planner, predictor, and supervisor.
**Key files**: **Key files**:
- `gardener/gardener-run.sh`Polling loop participant + orchestrator: lock, memory guard, - `gardener/gardener-run.sh` — Cron wrapper + orchestrator: lock, memory guard,
sources disinto project config, creates tmux session, injects formula prompt, sources disinto project config, creates tmux session, injects formula prompt,
monitors phase file via custom `_gardener_on_phase_change` callback (passed to monitors phase file via custom `_gardener_on_phase_change` callback (passed to
`run_formula_and_monitor`). Stays alive through CI/review/merge cycle after `run_formula_and_monitor`). Stays alive through CI/review/merge cycle after
`PHASE:awaiting_ci` — injects CI results and review feedback, re-signals `PHASE:awaiting_ci` — injects CI results and review feedback, re-signals
`PHASE:awaiting_ci` after fixes, signals `PHASE:awaiting_review` on CI pass. `PHASE:awaiting_ci` after fixes, signals `PHASE:awaiting_review` on CI pass.
Executes pending-actions manifest after PR merge. Executes pending-actions manifest after PR merge.
- `formulas/run-gardener.toml` — Execution spec: preflight, grooming, dust-bundling, - `formulas/run-gardener.toml` — Execution spec: preflight, grooming, dust-bundling, blocked-review, agents-update, commit-and-pr
agents-update, commit-and-pr
- `gardener/pending-actions.json` — Manifest of deferred repo actions (label changes, - `gardener/pending-actions.json` — Manifest of deferred repo actions (label changes,
closures, comments, issue creation). Written during grooming steps, committed to the closures, comments, issue creation). Written during grooming steps, committed to the
PR, reviewed alongside AGENTS.md changes, executed by gardener-run.sh after merge. PR, reviewed alongside AGENTS.md changes, executed by gardener-run.sh after merge.
@ -35,10 +31,10 @@ the gardener runs as part of the polling loop alongside the planner, predictor,
- `FORGE_TOKEN`, `FORGE_GARDENER_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT` - `FORGE_TOKEN`, `FORGE_GARDENER_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`
- `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by gardener-run.sh) - `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by gardener-run.sh)
**Lifecycle**: gardener-run.sh (invoked by polling loop every 6h, `check_active gardener`) **Lifecycle**: gardener-run.sh (cron 0,6,12,18) → `check_active gardener` → lock + memory guard
lock + memory guard → load formula + context → create tmux session → load formula + context → create tmux session →
Claude grooms backlog (writes proposed actions to manifest), bundles dust, Claude grooms backlog (writes proposed actions to manifest), bundles dust,
updates AGENTS.md, commits manifest + docs to PR → reviews blocked issues, updates AGENTS.md, commits manifest + docs to PR →
`PHASE:awaiting_ci` (stays alive) → CI pass → `PHASE:awaiting_review` `PHASE:awaiting_ci` (stays alive) → CI pass → `PHASE:awaiting_review`
review feedback → address + re-signal → merge → gardener-run.sh executes review feedback → address + re-signal → merge → gardener-run.sh executes
manifest actions via API → `PHASE:done`. When blocked on external resources manifest actions via API → `PHASE:done`. When blocked on external resources

50
gardener/PROMPT.md Normal file
View file

@ -0,0 +1,50 @@
# Gardener Prompt — Dust vs Ore
> **Note:** This is human documentation. The actual LLM prompt is built
> inline in `gardener-poll.sh` (with dynamic context injection). This file
> documents the design rationale for reference.
## Rule
Don't promote trivial tech-debt individually. Each promotion costs a full
factory cycle: CI + dev-agent + review + merge. Don't fill minecarts with
dust — put ore inside.
## What is dust?
- Comment fix
- Variable rename
- Style-only change (whitespace, formatting)
- Single-line edit
- Trivial cleanup with no behavioral impact
## What is ore?
- Multi-file changes
- Behavioral fixes
- Architectural improvements
- Security or correctness issues
- Anything requiring design thought
## LLM output format
When a tech-debt issue is dust, the LLM outputs:
```
DUST: {"issue": NNN, "group": "<file-or-subsystem>", "title": "...", "reason": "..."}
```
The `group` field clusters related dust by file or subsystem (e.g.
`"gardener"`, `"lib/env.sh"`, `"dev-poll"`).
## Bundling
The script collects dust items into `gardener/dust.jsonl`. When a group
accumulates 3+ items, the script automatically:
1. Creates one bundled backlog issue referencing all source issues
2. Closes the individual source issues with a cross-reference comment
3. Removes bundled items from the staging file
This converts N trivial issues into 1 actionable issue, saving N-1 factory
cycles.

View file

@ -51,4 +51,3 @@ Compact, decision-ready. Human should be able to reply "1a 2c 3b" and be done.
- Dev-agent doesn't understand the product — clear acceptance criteria save 2-3 CI cycles - Dev-agent doesn't understand the product — clear acceptance criteria save 2-3 CI cycles
- Feature issues MUST list affected e2e test files - Feature issues MUST list affected e2e test files
- Issue templates from ISSUE-TEMPLATES.md propagate via triage gate - Issue templates from ISSUE-TEMPLATES.md propagate via triage gate
- **AD-002 is a runtime invariant; nothing for the gardener to check at issue-groom time.** Concurrency is enforced by `flock session.lock` within each container and by `issue_claim` for per-issue work. A violation manifests as a 401 or VRAM OOM in agent logs, not as a malformed issue.

View file

@ -1,12 +1,12 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# ============================================================================= # =============================================================================
# gardener-run.sh — Polling-loop wrapper: gardener execution via SDK + formula # gardener-run.sh — Cron wrapper: gardener execution via SDK + formula
# #
# Synchronous bash loop using claude -p (one-shot invocation). # Synchronous bash loop using claude -p (one-shot invocation).
# No tmux sessions, no phase files — the bash script IS the state machine. # No tmux sessions, no phase files — the bash script IS the state machine.
# #
# Flow: # Flow:
# 1. Guards: run lock, memory check # 1. Guards: cron lock, memory check
# 2. Load formula (formulas/run-gardener.toml) # 2. Load formula (formulas/run-gardener.toml)
# 3. Build context: AGENTS.md, scratch file, prompt footer # 3. Build context: AGENTS.md, scratch file, prompt footer
# 4. agent_run(worktree, prompt) → Claude does maintenance, pushes if needed # 4. agent_run(worktree, prompt) → Claude does maintenance, pushes if needed
@ -17,7 +17,7 @@
# Usage: # Usage:
# gardener-run.sh [projects/disinto.toml] # project config (default: disinto) # gardener-run.sh [projects/disinto.toml] # project config (default: disinto)
# #
# Called by: entrypoint.sh polling loop (every 6 hours) # Cron: 0 0,6,12,18 * * * cd /home/debian/dark-factory && bash gardener/gardener-run.sh projects/disinto.toml
# ============================================================================= # =============================================================================
set -euo pipefail set -euo pipefail
@ -45,7 +45,7 @@ source "$FACTORY_ROOT/lib/agent-sdk.sh"
# shellcheck source=../lib/pr-lifecycle.sh # shellcheck source=../lib/pr-lifecycle.sh
source "$FACTORY_ROOT/lib/pr-lifecycle.sh" source "$FACTORY_ROOT/lib/pr-lifecycle.sh"
LOG_FILE="${DISINTO_LOG_DIR}/gardener/gardener.log" LOG_FILE="$SCRIPT_DIR/gardener.log"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh # shellcheck disable=SC2034 # consumed by agent-sdk.sh
LOGFILE="$LOG_FILE" LOGFILE="$LOG_FILE"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh # shellcheck disable=SC2034 # consumed by agent-sdk.sh
@ -54,50 +54,20 @@ SCRATCH_FILE="/tmp/gardener-${PROJECT_NAME}-scratch.md"
RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt" RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt"
GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt" GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt"
WORKTREE="/tmp/${PROJECT_NAME}-gardener-run" WORKTREE="/tmp/${PROJECT_NAME}-gardener-run"
LAST_SHA_FILE="${DISINTO_DATA_DIR}/gardener-last-sha.txt"
# Override LOG_AGENT for consistent agent identification log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
LOG_AGENT="gardener"
# ── Guards ──────────────────────────────────────────────────────────────── # ── Guards ────────────────────────────────────────────────────────────────
check_active gardener check_active gardener
acquire_run_lock "/tmp/gardener-run.lock" acquire_cron_lock "/tmp/gardener-run.lock"
memory_guard 2000 check_memory 2000
log "--- Gardener run start ---" log "--- Gardener run start ---"
# ── Precondition checks: skip if nothing to do ────────────────────────────
# Check for new commits since last run
CURRENT_SHA=$(git -C "$FACTORY_ROOT" rev-parse HEAD 2>/dev/null || echo "")
LAST_SHA=$(cat "$LAST_SHA_FILE" 2>/dev/null || echo "")
# Check for open issues needing grooming
backlog_count=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues?labels=backlog&state=open&limit=1" 2>/dev/null | jq length) || backlog_count=0
tech_debt_count=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues?labels=tech-debt&state=open&limit=1" 2>/dev/null | jq length) || tech_debt_count=0
if [ "$CURRENT_SHA" = "$LAST_SHA" ] && [ "${backlog_count:-0}" -eq 0 ] && [ "${tech_debt_count:-0}" -eq 0 ]; then
log "no new commits and no issues to groom — skipping"
exit 0
fi
log "current sha: ${CURRENT_SHA:0:8}..., backlog issues: ${backlog_count}, tech-debt issues: ${tech_debt_count}"
# ── Resolve forge remote for git operations ─────────────────────────────
resolve_forge_remote
# ── Resolve agent identity for .profile repo ────────────────────────────
resolve_agent_identity || true
# ── Load formula + context ─────────────────────────────────────────────── # ── Load formula + context ───────────────────────────────────────────────
load_formula_or_profile "gardener" "$FACTORY_ROOT/formulas/run-gardener.toml" || exit 1 load_formula "$FACTORY_ROOT/formulas/run-gardener.toml"
build_context_block AGENTS.md build_context_block AGENTS.md
# ── Prepare .profile context (lessons injection) ─────────────────────────
formula_prepare_profile_context
# ── Read scratch file (compaction survival) ─────────────────────────────── # ── Read scratch file (compaction survival) ───────────────────────────────
SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE")
SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE")
@ -135,7 +105,7 @@ You have full shell access and --dangerously-skip-permissions.
Fix what you can. File vault items for what you cannot. Do NOT ask permission — act first, report after. Fix what you can. File vault items for what you cannot. Do NOT ask permission — act first, report after.
## Project context ## Project context
${CONTEXT_BLOCK}$(formula_lessons_block) ${CONTEXT_BLOCK}
${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT} ${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT}
} }
## Result file ## Result file
@ -148,7 +118,16 @@ ${SCRATCH_INSTRUCTION}
${PROMPT_FOOTER}" ${PROMPT_FOOTER}"
# ── Create worktree ────────────────────────────────────────────────────── # ── Create worktree ──────────────────────────────────────────────────────
formula_worktree_setup "$WORKTREE" cd "$PROJECT_REPO_ROOT"
git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true
worktree_cleanup "$WORKTREE"
git worktree add "$WORKTREE" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null
cleanup() {
worktree_cleanup "$WORKTREE"
rm -f "$GARDENER_PR_FILE"
}
trap cleanup EXIT
# ── Post-merge manifest execution ──────────────────────────────────────── # ── Post-merge manifest execution ────────────────────────────────────────
# Reads gardener/pending-actions.json and executes each action via API. # Reads gardener/pending-actions.json and executes each action via API.
@ -177,21 +156,19 @@ _gardener_execute_manifest() {
case "$action" in case "$action" in
add_label) add_label)
local label label_id http_code resp local label label_id
label=$(jq -r ".[$i].label" "$manifest_file") label=$(jq -r ".[$i].label" "$manifest_file")
label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/labels" | jq -r --arg n "$label" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \
'.[] | select(.name == $n) | .id') || true '.[] | select(.name == $n) | .id') || true
if [ -n "$label_id" ]; then if [ -n "$label_id" ]; then
resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
"${FORGE_API}/issues/${issue}/labels" \ "${FORGE_API}/issues/${issue}/labels" \
-d "{\"labels\":[${label_id}]}" 2>/dev/null) || true -d "{\"labels\":[${label_id}]}" >/dev/null 2>&1; then
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
log "manifest: add_label '${label}' to #${issue}" log "manifest: add_label '${label}' to #${issue}"
else else
log "manifest: FAILED add_label '${label}' to #${issue}: HTTP ${http_code}" log "manifest: FAILED add_label '${label}' to #${issue}"
fi fi
else else
log "manifest: FAILED add_label — label '${label}' not found" log "manifest: FAILED add_label — label '${label}' not found"
@ -199,19 +176,17 @@ _gardener_execute_manifest() {
;; ;;
remove_label) remove_label)
local label label_id http_code resp local label label_id
label=$(jq -r ".[$i].label" "$manifest_file") label=$(jq -r ".[$i].label" "$manifest_file")
label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/labels" | jq -r --arg n "$label" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \
'.[] | select(.name == $n) | .id') || true '.[] | select(.name == $n) | .id') || true
if [ -n "$label_id" ]; then if [ -n "$label_id" ]; then
resp=$(curl -sf -w "\n%{http_code}" -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ if curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${issue}/labels/${label_id}" 2>/dev/null) || true "${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1; then
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
log "manifest: remove_label '${label}' from #${issue}" log "manifest: remove_label '${label}' from #${issue}"
else else
log "manifest: FAILED remove_label '${label}' from #${issue}: HTTP ${http_code}" log "manifest: FAILED remove_label '${label}' from #${issue}"
fi fi
else else
log "manifest: FAILED remove_label — label '${label}' not found" log "manifest: FAILED remove_label — label '${label}' not found"
@ -219,38 +194,34 @@ _gardener_execute_manifest() {
;; ;;
close) close)
local reason http_code resp local reason
reason=$(jq -r ".[$i].reason // empty" "$manifest_file") reason=$(jq -r ".[$i].reason // empty" "$manifest_file")
resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
"${FORGE_API}/issues/${issue}" \ "${FORGE_API}/issues/${issue}" \
-d '{"state":"closed"}' 2>/dev/null) || true -d '{"state":"closed"}' >/dev/null 2>&1; then
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
log "manifest: closed #${issue} (${reason})" log "manifest: closed #${issue} (${reason})"
else else
log "manifest: FAILED close #${issue}: HTTP ${http_code}" log "manifest: FAILED close #${issue}"
fi fi
;; ;;
comment) comment)
local body escaped_body http_code resp local body escaped_body
body=$(jq -r ".[$i].body" "$manifest_file") body=$(jq -r ".[$i].body" "$manifest_file")
escaped_body=$(printf '%s' "$body" | jq -Rs '.') escaped_body=$(printf '%s' "$body" | jq -Rs '.')
resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
"${FORGE_API}/issues/${issue}/comments" \ "${FORGE_API}/issues/${issue}/comments" \
-d "{\"body\":${escaped_body}}" 2>/dev/null) || true -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
log "manifest: commented on #${issue}" log "manifest: commented on #${issue}"
else else
log "manifest: FAILED comment on #${issue}: HTTP ${http_code}" log "manifest: FAILED comment on #${issue}"
fi fi
;; ;;
create_issue) create_issue)
local title body labels escaped_title escaped_body label_ids http_code resp local title body labels escaped_title escaped_body label_ids
title=$(jq -r ".[$i].title" "$manifest_file") title=$(jq -r ".[$i].title" "$manifest_file")
body=$(jq -r ".[$i].body" "$manifest_file") body=$(jq -r ".[$i].body" "$manifest_file")
labels=$(jq -r ".[$i].labels // [] | .[]" "$manifest_file") labels=$(jq -r ".[$i].labels // [] | .[]" "$manifest_file")
@ -270,46 +241,40 @@ _gardener_execute_manifest() {
done <<< "$labels" done <<< "$labels"
[ -n "$ids_json" ] && label_ids="[${ids_json}]" [ -n "$ids_json" ] && label_ids="[${ids_json}]"
fi fi
resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
"${FORGE_API}/issues" \ "${FORGE_API}/issues" \
-d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" 2>/dev/null) || true -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" >/dev/null 2>&1; then
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
log "manifest: created issue '${title}'" log "manifest: created issue '${title}'"
else else
log "manifest: FAILED create_issue '${title}': HTTP ${http_code}" log "manifest: FAILED create_issue '${title}'"
fi fi
;; ;;
edit_body) edit_body)
local body escaped_body http_code resp local body escaped_body
body=$(jq -r ".[$i].body" "$manifest_file") body=$(jq -r ".[$i].body" "$manifest_file")
escaped_body=$(printf '%s' "$body" | jq -Rs '.') escaped_body=$(printf '%s' "$body" | jq -Rs '.')
resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
"${FORGE_API}/issues/${issue}" \ "${FORGE_API}/issues/${issue}" \
-d "{\"body\":${escaped_body}}" 2>/dev/null) || true -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
log "manifest: edited body of #${issue}" log "manifest: edited body of #${issue}"
else else
log "manifest: FAILED edit_body #${issue}: HTTP ${http_code}" log "manifest: FAILED edit_body #${issue}"
fi fi
;; ;;
close_pr) close_pr)
local pr http_code resp local pr
pr=$(jq -r ".[$i].pr" "$manifest_file") pr=$(jq -r ".[$i].pr" "$manifest_file")
resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
"${FORGE_API}/pulls/${pr}" \ "${FORGE_API}/pulls/${pr}" \
-d '{"state":"closed"}' 2>/dev/null) || true -d '{"state":"closed"}' >/dev/null 2>&1; then
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
log "manifest: closed PR #${pr}" log "manifest: closed PR #${pr}"
else else
log "manifest: FAILED close_pr #${pr}: HTTP ${http_code}" log "manifest: FAILED close_pr #${pr}"
fi fi
;; ;;
@ -354,9 +319,9 @@ if [ -n "$PR_NUMBER" ]; then
if [ "$_PR_WALK_EXIT_REASON" = "merged" ]; then if [ "$_PR_WALK_EXIT_REASON" = "merged" ]; then
# Post-merge: pull primary, mirror push, execute manifest # Post-merge: pull primary, mirror push, execute manifest
git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$PROJECT_REPO_ROOT" fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true
git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true
git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$PROJECT_REPO_ROOT" pull --ff-only origin "$PRIMARY_BRANCH" 2>/dev/null || true
mirror_push mirror_push
_gardener_execute_manifest _gardener_execute_manifest
rm -f "$SCRATCH_FILE" rm -f "$SCRATCH_FILE"
@ -369,12 +334,5 @@ else
rm -f "$SCRATCH_FILE" rm -f "$SCRATCH_FILE"
fi fi
# Write journal entry post-session
profile_write_journal "gardener-run" "Gardener run $(date -u +%Y-%m-%d)" "complete" "" || true
rm -f "$GARDENER_PR_FILE" rm -f "$GARDENER_PR_FILE"
# Persist last-seen SHA for next run comparison
echo "$CURRENT_SHA" > "$LAST_SHA_FILE"
log "--- Gardener run done ---" log "--- Gardener run done ---"

View file

@ -1,47 +1,32 @@
[ [
{ {
"action": "close", "action": "edit_body",
"issue": 419, "issue": 765,
"reason": "Vision goal complete — all sub-issues #437-#454 closed, vault blast-radius redesign delivered" "body": "Depends on: none\n\n## Goal\n\nThe disinto website becomes a versioned artifact: built by CI, published to Codeberg's generic package registry, deployed to staging automatically. Version visible in footer.\n\n## Files to add/change\n\n### `site/VERSION`\n```\n0.1.0\n```\n\n### `site/build.sh`\n```bash\n#!/bin/bash\nVERSION=$(cat VERSION)\nmkdir -p dist\ncp *.html *.jpg *.webp *.png *.ico *.xml robots.txt dist/\nsed -i \"s|Built from scrap, powered by a single battery.|v${VERSION} · Built from scrap, powered by a single battery.|\" dist/index.html\necho \"$VERSION\" > dist/VERSION\n```\n\n### `site/index.html`\nNo template placeholder needed — `build.sh` does the sed replacement on the existing footer text.\n\n### `.woodpecker/site.yml`\n```yaml\nwhen:\n path: \"site/**\"\n event: push\n branch: main\n\nsteps:\n - name: build\n image: alpine\n commands:\n - cd site && sh build.sh\n - VERSION=$(cat site/VERSION)\n - tar czf site-${VERSION}.tar.gz -C site/dist .\n\n - name: publish\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n --upload-file site-${VERSION}.tar.gz\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n\n - name: deploy-staging\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n -o site.tar.gz\n - rm -rf /srv/staging/*\n - tar xzf site.tar.gz -C /srv/staging/\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n volumes:\n - /home/debian/staging-site:/srv/staging\n```\n\n## Infra setup (manual, before first run)\n- `mkdir -p /home/debian/staging-site`\n- Add to Caddyfile: `staging.disinto.ai { root * /home/debian/staging-site; file_server }`\n- DNS: `staging.disinto.ai` A record → same IP as `disinto.ai`\n- Reload Caddy: `sudo systemctl reload caddy`\n- Add `forge_token` as Woodpecker repo secret for johba/disinto (if not already set)\n- Add `/home/debian/staging-site` to `WOODPECKER_BACKEND_DOCKER_VOLUMES`\n\n## Verification\n- [ ] Merge PR that touches `site/` → CI runs site pipeline\n- [ ] Package appears at `codeberg.org/johba/-/packages/generic/disinto-site/0.1.0`\n- [ ] `staging.disinto.ai` serves the site with `v0.1.0` in footer\n- [ ] `disinto.ai` (production) unchanged\n\n## Related\n- #764 — docker stack edge proxy + staging (future: this moves inside the stack)\n- #755 — vault-gated production promotion (production deploy comes later)\n\n## Affected files\n- `site/VERSION` — new, holds current version string\n- `site/build.sh` — new, builds dist/ with version injected into footer\n- `.woodpecker/site.yml` — new, CI pipeline for build/publish/deploy-staging"
},
{
"action": "close",
"issue": 494,
"reason": "Resolved by PRs #502 and #503 (both merged) — repo_root workaround removed, container paths derived at runtime"
},
{
"action": "close",
"issue": 477,
"reason": "Obsolete — #379 (while-true loop) was deployed on 2026-04-08; env.sh container guard is now correct behavior, no revert needed"
}, },
{ {
"action": "edit_body", "action": "edit_body",
"issue": 498, "issue": 764,
"body": "Flagged by AI reviewer in PR #496.\n\n## Problem\n\n`has_responses_to_process` is only set to `true` inside the `open_arch_prs >= 3` gate in `architect/architect-run.sh` (line 543). When fewer than 3 architect PRs are open, ACCEPT/REJECT responses on existing PRs are never processed — the response-processing block at line 687 defaults to `false` and is skipped entirely.\n\nThis means that if a user ACCEPTs or REJECTs a pitch while the open PR count is below 3, the architect agent will never handle the response.\n\n## Fix\n\nSet `has_responses_to_process` (or an equivalent guard) unconditionally by scanning open PRs for ACCEPT/REJECT responses, not only when the 3-PR cap is hit.\n\n---\n*Auto-created from AI review*\n\n## Acceptance criteria\n\n- [ ] `has_responses_to_process` is computed by scanning open architect PRs for ACCEPT/REJECT responses regardless of `open_arch_prs` count\n- [ ] When a user posts ACCEPT or REJECT on an architect PR and open PR count < 3, the response is processed in the same run\n- [ ] Existing behavior when `open_arch_prs >= 3` is unchanged\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `architect/architect-run.sh` (lines ~543 and ~687 — response-processing gate)" "body": "Depends on: none (builds on existing docker-compose generation in `bin/disinto`)\n\n## Design\n\n`disinto init` + `disinto up` starts two additional containers as base factory infrastructure:\n\n### Edge proxy (Caddy)\n- Reverse proxies to Forgejo and Woodpecker\n- Serves staging site\n- Runs on ports 80/443\n- At bootstrap: IP-only, self-signed TLS or HTTP\n- Domain + Let's Encrypt added later via vault resource request\n\n### Staging container (Caddy)\n- Static file server for the project's staging artifacts\n- Starts with a default \"Nothing shipped yet\" page\n- CI pipelines write to a shared volume to update staging content\n- No vault approval needed — staging is the factory's sandbox\n\n### docker-compose addition\n```yaml\nservices:\n edge:\n image: caddy:alpine\n ports:\n - \"80:80\"\n - \"443:443\"\n volumes:\n - ./Caddyfile:/etc/caddy/Caddyfile\n - caddy_data:/data\n depends_on:\n - forgejo\n - woodpecker-server\n - staging\n\n staging:\n image: caddy:alpine\n volumes:\n - staging-site:/srv/site\n # Not exposed directly — edge proxies to it\n\nvolumes:\n caddy_data:\n staging-site:\n```\n\n### Caddyfile (generated by `disinto init`)\n```\n# IP-only at bootstrap, domain added later\n:80 {\n handle /forgejo/* {\n reverse_proxy forgejo:3000\n }\n handle /ci/* {\n reverse_proxy woodpecker-server:8000\n }\n handle {\n reverse_proxy staging:80\n }\n}\n```\n\n### Staging update flow\n1. CI builds artifact (site tarball, etc.)\n2. CI step writes to `staging-site` volume\n3. Staging container serves updated content immediately\n4. No restart needed — Caddy serves files directly\n\n### Domain lifecycle\n- Bootstrap: no domain, edge serves on IP\n- Later: factory files vault resource request for domain\n- Human buys domain, sets DNS\n- Caddyfile updated with domain, Let's Encrypt auto-provisions TLS\n\n## Affected files\n- `bin/disinto` — `generate_compose()` adds edge + staging services\n- New: default staging page (\"Nothing shipped yet\")\n- New: Caddyfile template in `docker/`\n\n## Related\n- #755 — vault-gated deployment promotion (production comes later)\n- #757 — ops repo (domain is a resource requested through vault)\n\n## Acceptance criteria\n- [ ] `disinto init` generates a `docker-compose.yml` that includes `edge` (Caddy) and `staging` containers\n- [ ] Edge proxy routes `/forgejo/*` → Forgejo, `/ci/*` → Woodpecker, default → staging container\n- [ ] Staging container serves a default \"Nothing shipped yet\" page on first boot\n- [ ] `docker/` directory contains a Caddyfile template generated by `disinto init`\n- [ ] `disinto up` starts all containers including edge and staging without manual steps"
},
{
"action": "edit_body",
"issue": 761,
"body": "Depends on: #747\n\n## Design\n\nEach agent account on the bundled Forgejo gets a `.profile` repo. This repo holds the agent's formula (copied from disinto at creation time) and its journal.\n\n### Structure\n```\n{agent-bot}/.profile/\n├── formula.toml # snapshot of the formula at agent creation time\n├── journal/ # daily logs of what the agent did\n│ ├── 2026-03-26.md\n│ └── ...\n└── knowledge/ # learned patterns, best-practices (optional, agent can evolve)\n```\n\n### Lifecycle\n1. **Create agent** — `disinto init` or `disinto spawn-agent` creates Forgejo account + `.profile` repo\n2. **Copy formula** — current `formulas/{role}.toml` from disinto repo is copied to `.profile/formula.toml`\n3. **Agent reads its own formula** — at session start, agent reads from its `.profile`, not from the disinto repo\n4. **Agent writes journal** — daily entries pushed to `.profile/journal/`\n5. **Agent can evolve knowledge** — best-practices, heuristics, patterns written to `.profile/knowledge/`\n\n### What this enables\n\n**A/B testing formulas:** Create two agents from different formula versions, run both against the same backlog, compare results (cycle time, CI pass rate, review rejection rate).\n\n**Rollback:** New formula worse? Kill agent, spawn from older formula version.\n\n**Audit:** What formula was this agent running when it produced that PR? Check its `.profile` at that git commit.\n\n**Drift tracking:** Diff what an agent learned (`.profile/knowledge/`) vs what it started with. Measure formula evolution over time.\n\n**Portability:** Move agent to different box — `git clone` its `.profile`.\n\n### Disinto repo becomes the template\n\n```\ndisinto repo:\n formulas/dev-agent.toml ← canonical template, evolves\n formulas/review-agent.toml\n formulas/planner.toml\n ...\n\nRunning agents:\n dev-bot-v2/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v2\n dev-bot-v3/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v3\n review-bot/.profile/formula.toml ← snapshot from formulas/review-agent.toml\n```\n\nThe formula in the disinto repo is the template. The `.profile` copy is the instance. They can diverge — that's a feature, not a bug.\n\n## Affected files\n- `bin/disinto` — agent creation copies formula to .profile\n- Agent session scripts — read formula from .profile instead of local formulas/ dir\n- Planner/supervisor — can read other agents' journals from their .profile repos\n\n## Related\n- #747 — per-agent Forgejo accounts (prerequisite)\n- #757 — ops repo (shared concerns stay there: vault, portfolio, resources)\n\n## Acceptance criteria\n- [ ] `disinto spawn-agent` (or `disinto init`) creates a Forgejo account + `.profile` repo for each agent bot\n- [ ] Current `formulas/{role}.toml` is copied to `.profile/formula.toml` at agent creation time\n- [ ] Agent session script reads its formula from `.profile/formula.toml`, not from the repo's `formulas/` directory\n- [ ] Agent writes daily journal entries to `.profile/journal/YYYY-MM-DD.md`"
},
{
"action": "edit_body",
"issue": 742,
"body": "## Problem\n\n`gardener/recipes/*.toml` (4 files: cascade-rebase, chicken-egg-ci, flaky-test, shellcheck-violations) are an older pattern predating `formulas/*.toml`. Two systems for the same thing.\n\n## Fix\n\nMigrate any unique content from recipes to the gardener formula or to new formulas. Delete the recipes directory.\n\n## Affected files\n- `gardener/recipes/*.toml` — delete after migration\n- `formulas/run-gardener.toml` — absorb relevant content\n- Gardener scripts that reference recipes/\n\n## Acceptance criteria\n- [ ] Contents of `gardener/recipes/*.toml` are diff'd against `formulas/run-gardener.toml` — any unique content is migrated\n- [ ] `gardener/recipes/` directory is deleted\n- [ ] No scripts in `gardener/` reference the `recipes/` path after migration\n- [ ] ShellCheck passes on all modified scripts"
}, },
{ {
"action": "add_label", "action": "add_label",
"issue": 498, "issue": 742,
"label": "backlog" "label": "backlog"
}, },
{
"action": "edit_body",
"issue": 499,
"body": "Flagged by AI reviewer in PR #496.\n\n## Problem\n\nIn `architect/architect-run.sh` line 203, the `has_open_subissues` function compares `.number` (a JSON integer) against `$vid` (a bash string via `--arg`). In jq, `42 != \"42\"` evaluates to true (different types are never equal), so the self-exclusion filter never fires. In practice this is low-risk since vision issues don't contain 'Decomposed from #N' in their own bodies, but the self-exclusion logic is silently broken.\n\n## Fix\n\nCast the string to a number in jq: `select(.number != ($vid | tonumber))`\n\n---\n*Auto-created from AI review*\n\n## Acceptance criteria\n\n- [ ] `has_open_subissues` self-exclusion filter correctly excludes the vision issue itself using `($vid | tonumber)` cast\n- [ ] A vision issue does not appear in its own subissue list\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `architect/architect-run.sh` (line ~203 — `has_open_subissues` jq filter)"
},
{ {
"action": "add_label", "action": "add_label",
"issue": 499, "issue": 741,
"label": "backlog"
},
{
"action": "edit_body",
"issue": 471,
"body": "## Bug description\n\nWhen dev-bot picks a backlog issue and launches dev-agent.sh, a second dev-poll instance (dev-qwen) can race ahead and mark the issue as stale/blocked before dev-agent.sh finishes claiming it.\n\n## Reproduction\n\nObserved on issues #443 and #445 (2026-04-08):\n\n**#443 timeline:**\n- `20:39:03` — dev-bot removes `backlog`, adds `in-progress` (via dev-poll backlog pickup)\n- `20:39:04` — dev-qwen removes `in-progress`, adds `blocked` with reason `no_assignee_no_open_pr_no_lock`\n- `20:40:11` — dev-bot pushes commit (dev-agent was actually working the whole time)\n- `20:44:02` — PR merged, issue closed\n\n**#445 timeline:**\n- `20:54:03` — dev-bot adds `in-progress`\n- `20:54:06` — dev-qwen marks `blocked` (3 seconds later)\n- `20:55:13` — dev-bot pushes commit\n- `21:09:03` — PR merged, issue closed\n\nIn both cases, the work completed successfully despite being labeled blocked.\n\n## Root cause\n\n`issue_claim()` in `lib/issue-lifecycle.sh` performs three sequential API calls:\n1. PATCH assignee\n2. POST in-progress label\n3. DELETE backlog label\n\nMeanwhile, dev-poll on another agent (dev-qwen) runs its orphan scan, sees the issue labeled `in-progress` but with no assignee set yet (assign PATCH hasn't landed or was read stale), no open PR, and no lock file. It concludes the issue is stale and relabels to `blocked`.\n\nThe race window is ~1-3 seconds between in-progress being set and the assignee being visible to other pollers.\n\n## Impact\n\n- Issues get spuriously labeled `blocked` with a misleading stale diagnostic comment\n- dev-agent continues working anyway (it already has the issue number), so the blocked label is just noise\n- But it could confuse the gardener or humans reading the issue timeline\n- If another dev-poll instance picks up the blocked issue for recovery before the original agent finishes, it could cause duplicate work\n\n## Possible fixes\n\n1. **Assign before labeling**: In `issue_claim()`, set the assignee first, then add in-progress. This way, by the time in-progress is visible, the assignee is already set.\n2. **Grace period in stale detection**: Skip issues whose in-progress label was added less than N seconds ago (check label event timestamp via timeline API).\n3. **Lock file before label**: Write the agent lock file (`/tmp/dev-impl-summary-...`) at the start of dev-agent.sh before calling `issue_claim()`, so the stale detector sees the lock.\n4. **Atomic claim check**: dev-poll should re-check assignee after a short delay before declaring stale, to allow for API propagation.\n\n## Acceptance criteria\n\n- [ ] Stale detection in dev-poll does not mark an issue as blocked within the first 60 seconds of the in-progress label being applied\n- [ ] `issue_claim()` assigns the issue before adding the in-progress label (or equivalent fix is implemented)\n- [ ] No spurious `blocked` labels appear on issues that are actively being worked (verified by log inspection or integration test)\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `lib/issue-lifecycle.sh` — `issue_claim()` function (assignee + label ordering)\n- `dev/dev-poll.sh` — orphan/stale detection logic"
},
{
"action": "add_label",
"issue": 471,
"label": "backlog" "label": "backlog"
} }
] ]

View file

@ -0,0 +1,16 @@
# gardener/recipes/cascade-rebase.toml — PR outdated after main moved
#
# Trigger: PR mergeable=false (stale branch or dismissed approval)
# Playbook: rebase only — merge and re-approval happen on subsequent cycles
# after CI reruns on the rebased branch (rebase is async via Gitea API)
name = "cascade-rebase"
description = "PR outdated after main moved — mergeable=false or stale approval"
priority = 20
[trigger]
pr_mergeable = false
[[playbook]]
action = "rebase-pr"
description = "Rebase PR onto main (async — CI reruns, merge on next cycle)"

View file

@ -0,0 +1,25 @@
# gardener/recipes/chicken-egg-ci.toml — PR introduces CI step that fails on pre-existing code
#
# Trigger: New .woodpecker/*.yml in PR + lint/check step + failures on unchanged files
# Playbook: make step non-blocking, create per-file issues, create follow-up to remove bypass
name = "chicken-egg-ci"
description = "PR introduces a CI pipeline/linting step that fails on pre-existing code"
priority = 10
[trigger]
pr_files = '\.woodpecker/.*\.yml$'
step_name = '(?i)(lint|shellcheck|check)'
failures_on_unchanged = true
[[playbook]]
action = "make-step-non-blocking"
description = "Make failing step non-blocking (|| true) in the PR"
[[playbook]]
action = "lint-per-file"
description = "Create per-file fix issues for pre-existing violations (generic linter support)"
[[playbook]]
action = "create-followup-remove-bypass"
description = "Create follow-up issue to remove || true once fixes land"

View file

@ -0,0 +1,20 @@
# gardener/recipes/flaky-test.toml — CI fails intermittently
#
# Trigger: Test step fails + multiple CI attempts (same step, different output)
# Playbook: retrigger CI (max 2x), quarantine test if still failing
name = "flaky-test"
description = "CI fails intermittently — same step fails across multiple attempts"
priority = 30
[trigger]
step_name = '(?i)test'
min_attempts = 2
[[playbook]]
action = "retrigger-ci"
description = "Retrigger CI (max 2 retries)"
[[playbook]]
action = "quarantine-test"
description = "If still failing, quarantine test and create fix issue"

View file

@ -0,0 +1,20 @@
# gardener/recipes/shellcheck-violations.toml — ShellCheck step fails
#
# Trigger: Step named *shellcheck* fails with SC#### codes in output
# Playbook: parse per-file, create one issue per file, label backlog
name = "shellcheck-violations"
description = "ShellCheck step fails with SC#### codes in output"
priority = 40
[trigger]
step_name = '(?i)shellcheck'
output = 'SC\d{4}'
[[playbook]]
action = "shellcheck-per-file"
description = "Parse output by file, create one fix issue per file with specific SC codes"
[[playbook]]
action = "label-backlog"
description = "Label created issues as backlog"

View file

@ -1,28 +0,0 @@
# CI/CD — Best Practices
## CI Pipeline Issues (P2)
When CI pipelines are stuck running >20min or pending >30min:
### Investigation Steps
1. Check pipeline status via Forgejo API:
```bash
curl -sf -H "Authorization: token $FORGE_TOKEN" \
"$FORGE_API/pipelines?limit=50" | jq '.[] | {number, status, created}'
```
2. Check Woodpecker CI if configured:
```bash
curl -sf -H "Authorization: Bearer $WOODPECKER_TOKEN" \
"$WOODPECKER_SERVER/api/repos/${WOODPECKER_REPO_ID}/pipelines?limit=10"
```
### Common Fixes
- **Stuck pipeline**: Cancel via Forgejo API, retrigger
- **Pending pipeline**: Check queue depth, scale CI runners
- **Failed pipeline**: Review logs, fix failing test/step
### Prevention
- Set timeout limits on CI pipelines
- Monitor runner capacity and scale as needed
- Use caching for dependencies to reduce build time

View file

@ -1,28 +0,0 @@
# Dev Agent — Best Practices
## Dev Agent Issues (P2)
When dev-agent is stuck, blocked, or in bad state:
### Dead Lock File
```bash
# Check if process still exists
ps -p $(cat /path/to/lock.file) 2>/dev/null || rm -f /path/to/lock.file
```
### Stale Worktree Cleanup
```bash
cd "$PROJECT_REPO_ROOT"
git worktree remove --force /tmp/stale-worktree 2>/dev/null || true
git worktree prune 2>/dev/null || true
```
### Blocked Pipeline
- Check if PR is awaiting review or CI
- Verify no other agent is actively working on same issue
- Check for unmet dependencies (issues with `Depends on` refs)
### Prevention
- Concurrency bounded per LLM backend (AD-002)
- Clear lock files in EXIT traps
- Use phase files to track agent state

View file

@ -1,35 +0,0 @@
# Disk Management — Best Practices
## Disk Pressure Response (P1)
When disk usage exceeds 80%, take these actions in order:
### Immediate Actions
1. **Docker cleanup** (safe, low impact):
```bash
sudo docker system prune -f
```
2. **Aggressive Docker cleanup** (if still >80%):
```bash
sudo docker system prune -a -f
```
This removes unused images in addition to containers/volumes.
3. **Log rotation**:
```bash
for f in "$FACTORY_ROOT"/{dev,review,supervisor,gardener,planner,predictor}/*.log; do
[ -f "$f" ] && [ "$(du -k "$f" | cut -f1)" -gt 10240 ] && truncate -s 0 "$f"
done
```
### Prevention
- Monitor disk with alerts at 70% (warning) and 80% (critical)
- Set up automatic log rotation for agent logs
- Clean up old Docker images regularly
- Consider using separate partitions for `/var/lib/docker`
### When to Escalate
- Disk stays >80% after cleanup (indicates legitimate growth)
- No unused Docker images to clean
- Critical data filling disk (check /home, /var/log)

View file

@ -1,25 +0,0 @@
# Forgejo Operations — Best Practices
## Forgejo Issues
When Forgejo operations encounter issues:
### API Rate Limits
- Monitor rate limit headers in API responses
- Implement exponential backoff on 429 responses
- Use agent-specific tokens (#747) to increase limits
### Authentication Issues
- Verify FORGE_TOKEN is valid and not expired
- Check agent identity matches token (#747)
- Use FORGE_<AGENT>_TOKEN for agent-specific identities
### Repository Access
- Verify FORGE_REMOTE matches actual git remote
- Check token has appropriate permissions (repo, write)
- Use `resolve_forge_remote()` to auto-detect remote
### Prevention
- Set up monitoring for API failures
- Rotate tokens before expiry
- Document required permissions per agent

View file

@ -1,28 +0,0 @@
# Git State Recovery — Best Practices
## Git State Issues (P2)
When git repo is on wrong branch or in broken rebase state:
### Wrong Branch Recovery
```bash
cd "$PROJECT_REPO_ROOT"
git checkout "$PRIMARY_BRANCH" 2>/dev/null || git checkout master 2>/dev/null
```
### Broken Rebase Recovery
```bash
cd "$PROJECT_REPO_ROOT"
git rebase --abort 2>/dev/null || true
git checkout "$PRIMARY_BRANCH" 2>/dev/null || git checkout master 2>/dev/null
```
### Stale Lock File Cleanup
```bash
rm -f /path/to/stale.lock
```
### Prevention
- Always checkout primary branch after rebase conflicts
- Remove lock files after agent sessions complete
- Use `git status` to verify repo state before operations

View file

@ -1,27 +0,0 @@
# Memory Management — Best Practices
## Memory Crisis Response (P0)
When RAM available drops below 500MB or swap usage exceeds 3GB, take these actions:
### Immediate Actions
1. **Kill stale claude processes** (>3 hours old):
```bash
pgrep -f "claude -p" --older 10800 2>/dev/null | xargs kill 2>/dev/null || true
```
2. **Drop filesystem caches**:
```bash
sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1 || true
```
### Prevention
- Set memory_guard to 2000MB minimum (default in env.sh)
- Configure swap usage alerts at 2GB
- Monitor for memory leaks in long-running processes
- Use cgroups for process memory limits
### When to Escalate
- RAM stays <500MB after cache drop
- Swap continues growing after process kills
- System becomes unresponsive (OOM killer active)

View file

@ -1,23 +0,0 @@
# Review Agent — Best Practices
## Review Agent Issues
When review agent encounters issues with PRs:
### Stale PR Handling
- PRs stale >20min (CI done, no push since) → file vault item for dev-agent
- Do NOT push branches or attempt merges directly
- File vault item with:
- What: Stale PR requiring push
- Why: Factory degraded
- Unblocks: dev-agent will push the branch
### Circular Dependencies
- Check backlog for issues with circular `Depends on` refs
- Use `lib/parse-deps.sh` to analyze dependency graph
- Report to planner for resolution
### Prevention
- Review agent only reads PRs, never modifies
- Use vault items for actions requiring dev-agent
- Monitor for PRs stuck in review state

View file

@ -1,4 +1,4 @@
<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 --> <!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
# Shared Helpers (`lib/`) # Shared Helpers (`lib/`)
All agents source `lib/env.sh` as their first action. Additional helpers are All agents source `lib/env.sh` as their first action. Additional helpers are
@ -6,29 +6,19 @@ sourced as needed.
| File | What it provides | Sourced by | | File | What it provides | Sourced by |
|---|---|---| |---|---|---|
| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Required env var**: `FORGE_PASS` bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). | Every agent | | `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`, `FORGE_ACTION_TOKEN`) each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens only the vault-runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. | Every agent |
| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr | | `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this). | dev-poll, review-poll, review-pr, supervisor-poll |
| `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) | | `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) |
| `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. `<pipeline_number> [--step <name>]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh | | `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) |
| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). **Container path derivation**: `PROJECT_REPO_ROOT` and `OPS_REPO_ROOT` are derived at runtime when `DISINTO_CONTAINER=1` — hardcoded to `/home/agent/repos/$PROJECT_NAME` and `/home/agent/repos/$PROJECT_NAME-ops` respectively — not read from the TOML. This ensures correct paths inside containers where host paths in the TOML would be wrong. | env.sh (when `PROJECT_TOML` is set) | | `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll | | `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `build_graph_section()`, `run_formula_and_monitor(AGENT [TIMEOUT] [CALLBACK])` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `formula_phase_callback()` handles `PHASE:escalate` (unified escalation path — kills the session). `run_formula_and_monitor` accepts an optional CALLBACK (default: `formula_phase_callback`) so callers can install custom merge-through or escalation handlers. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh, action-agent.sh |
| `lib/formula-session.sh` | `acquire_run_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven polling-loop agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh | | `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, action-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points |
| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in loop logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | polling-loop entry points | | `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh and dev/phase-handler.sh — called after every successful merge. | dev-poll.sh, phase-handler.sh |
| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh |
| `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh | | `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh |
| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh | | `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh |
| `lib/stack-lock.sh` | File-based lock protocol for singleton project stack access. `stack_lock_acquire(holder, project)` — polls until free, breaks stale heartbeats (>10 min old), claims lock. `stack_lock_release(project)` — deletes lock file. `stack_lock_check(project)` — inspect current lock state. `stack_lock_heartbeat(project)` — update heartbeat timestamp (callers must call every 2 min while holding). Lock files at `~/data/locks/<project>-stack.lock`. | docker/edge/dispatcher.sh, reproduce formula | | `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) |
| `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) | | `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) |
| `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)``git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh | | `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)``git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, action-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh |
| `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future) | | `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future), action-agent.sh (future) |
| `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future) | | `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future), action-agent.sh (future) |
| `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request <action_id> <toml_content>` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/<action-id>`, writes `vault/actions/<action-id>.toml`, creates PR targeting `main` with title `vault: <action-id>` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. **Low-tier bypass**: if the action's `blast_radius` classifies as `low` (via `vault/classify.sh`), `vault_request` calls `_vault_commit_direct()` which commits directly to ops `main` using `FORGE_ADMIN_TOKEN` — no PR, no approval wait. Returns `0` (not a PR number) for direct commits. Requires `FORGE_TOKEN`, `FORGE_ADMIN_TOKEN` (low-tier only), `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher | | `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `phase_file` is set, passes it to the idle stop hook (`on-idle-stop.sh`) so the hook can **nudge Claude** (up to 2 times) if Claude returns to the prompt without writing to the phase file — the hook injects a tmux reminder asking Claude to signal PHASE:done or PHASE:awaiting_ci. The PreToolUse guard hook (`on-pretooluse-guard.sh`) receives the session name as a third argument — formula agents (`gardener-*`, `planner-*`, `predictor-*`, `supervisor-*`) are identified this way and allowed to access `FACTORY_ROOT` from worktrees (they need env.sh, AGENTS.md, formulas/, lib/). **OAuth flock**: when `DISINTO_CONTAINER=1`, Claude CLI is wrapped in `flock -w 300 ~/.claude/session.lock` to queue concurrent token refresh attempts and prevent rotation races across agents sharing the same credentials. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `>` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or `PHASE:escalate` / other `PHASE:*` string. **Unified escalation**: `PHASE:escalate` is the signal that a session needs human input (renamed from `PHASE:needs_human`). **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh, action-agent.sh |
| `lib/branch-protection.sh` | Branch protection helpers for Forgejo repos. `setup_vault_branch_protection()` — configures admin-only merge protection on main (require 1 approval, restrict merge to admin role, block direct pushes). `setup_profile_branch_protection()` — same protection for `.profile` repos. `verify_branch_protection()` — checks protection is correctly configured. `remove_branch_protection()` — removes protection (cleanup/testing). Handles race condition after initial push: retries with backoff if Forgejo hasn't processed the branch yet. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_OPS_REPO`. | bin/disinto (hire-an-agent) |
| `lib/agent-sdk.sh` | `agent_run([--resume SESSION_ID] [--worktree DIR] PROMPT)` — one-shot `claude -p` invocation with session persistence. Saves session ID to `SID_FILE`, reads it back on resume. `agent_recover_session()` — restore previous session ID from `SID_FILE` on startup. **Nudge guard**: skips nudge injection if the worktree is clean and no push is expected, preventing spurious re-invocations. Callers must define `SID_FILE`, `LOGFILE`, and `log()` before sourcing. | formula-driven agents (dev-agent, planner-run, predictor-run, gardener-run) |
| `lib/forge-setup.sh` | `setup_forge()` — Forgejo instance provisioning: creates admin user, bot accounts, org, repos (code + ops), configures webhooks, sets repo topics. Extracted from `bin/disinto`. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`. **Password storage (#361)**: after creating each bot account, stores its password in `.env` as `FORGE_<BOT>_PASS` (e.g. `FORGE_PASS`, `FORGE_REVIEW_PASS`, etc.) for use by `forge-push.sh`. | bin/disinto (init) |
| `lib/forge-push.sh` | `push_to_forge()` — pushes a local clone to the Forgejo remote and verifies the push. `_assert_forge_push_globals()` validates required env vars before use. Requires `FORGE_URL`, `FORGE_PASS`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. **Auth**: uses `FORGE_PASS` (bot password) for git HTTP push — Forgejo 11.x rejects API tokens for `git push` (#361). | bin/disinto (init) |
| `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence, sprints). Evidence subdirectories seeded: engagement/, red-team/, holdout/, evolution/, user-test/. Also seeds sprints/ for architect output. Exports `_ACTUAL_OPS_SLUG`. `migrate_ops_repo(ops_root, [primary_branch])` — idempotent migration helper that seeds missing directories and .gitkeep files on existing ops repos (pre-#407 deployments). | bin/disinto (init) |
| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for bare-metal deployments (compose mode uses polling loop instead). `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) |
| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (uses `codeberg.org/forgejo/forgejo:11.0` tag; adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility), `generate_caddyfile()` — Caddyfile, `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) |
| `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) |
| `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) |

View file

@ -46,23 +46,9 @@ agent_run() {
[ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL") [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL")
local run_dir="${worktree_dir:-$(pwd)}" local run_dir="${worktree_dir:-$(pwd)}"
local lock_file="${HOME}/.claude/session.lock" local output
mkdir -p "$(dirname "$lock_file")"
local output rc
log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})" log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})"
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$? output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true
if [ "$rc" -eq 124 ]; then
log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)"
elif [ "$rc" -ne 0 ]; then
log "agent_run: claude exited with code $rc"
# Log last 3 lines of output for diagnostics
if [ -n "$output" ]; then
log "agent_run: last output lines: $(echo "$output" | tail -3)"
fi
fi
if [ -z "$output" ]; then
log "agent_run: empty output (claude may have crashed or failed, exit code: $rc)"
fi
# Extract and persist session_id # Extract and persist session_id
local new_sid local new_sid
@ -72,45 +58,4 @@ agent_run() {
printf '%s' "$new_sid" > "$SID_FILE" printf '%s' "$new_sid" > "$SID_FILE"
log "agent_run: session_id=${new_sid:0:12}..." log "agent_run: session_id=${new_sid:0:12}..."
fi fi
# Save output for diagnostics (no_push, crashes)
_AGENT_LAST_OUTPUT="$output"
local diag_file="${DISINTO_LOG_DIR:-/tmp}/dev/agent-run-last.json"
printf '%s' "$output" > "$diag_file" 2>/dev/null || true
# Nudge: if the model stopped without pushing, resume with encouragement.
# Some models emit end_turn prematurely when confused. A nudge often unsticks them.
if [ -n "$_AGENT_SESSION_ID" ] && [ -n "$output" ]; then
local has_changes
has_changes=$(cd "$run_dir" && git status --porcelain 2>/dev/null | head -1) || true
local has_pushed
has_pushed=$(cd "$run_dir" && git log --oneline "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH:-main}..HEAD" 2>/dev/null | head -1) || true
if [ -z "$has_pushed" ]; then
if [ -n "$has_changes" ]; then
# Nudge: there are uncommitted changes
local nudge="You stopped but did not push any code. You have uncommitted changes. Commit them and push."
log "agent_run: nudging (uncommitted changes)"
local nudge_rc
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
if [ "$nudge_rc" -eq 124 ]; then
log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)"
elif [ "$nudge_rc" -ne 0 ]; then
log "agent_run: nudge claude exited with code $nudge_rc"
# Log last 3 lines of output for diagnostics
if [ -n "$output" ]; then
log "agent_run: nudge last output lines: $(echo "$output" | tail -3)"
fi
fi
new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true
if [ -n "$new_sid" ]; then
_AGENT_SESSION_ID="$new_sid"
printf '%s' "$new_sid" > "$SID_FILE"
fi
printf '%s' "$output" > "$diag_file" 2>/dev/null || true
_AGENT_LAST_OUTPUT="$output"
else
log "agent_run: no push and no changes — skipping nudge"
fi
fi
fi
} }

486
lib/agent-session.sh Normal file
View file

@ -0,0 +1,486 @@
#!/usr/bin/env bash
# agent-session.sh — Shared tmux + Claude interactive session helpers
#
# Source this into agent orchestrator scripts for reusable session management.
#
# Functions:
# agent_wait_for_claude_ready SESSION_NAME [TIMEOUT_SECS]
# agent_inject_into_session SESSION_NAME TEXT
# agent_kill_session SESSION_NAME
# monitor_phase_loop PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME]
# session_lock_acquire [TIMEOUT_SECS]
# session_lock_release
# --- Cooperative session lock (fd-based) ---
# File descriptor for the session lock. Set by create_agent_session().
# Callers can release/re-acquire via session_lock_release/session_lock_acquire
# to allow other Claude sessions during idle phases (awaiting_review/awaiting_ci).
SESSION_LOCK_FD=""
# Release the session lock without closing the file descriptor.
# The fd stays open so it can be re-acquired later.
session_lock_release() {
if [ -n "${SESSION_LOCK_FD:-}" ]; then
flock -u "$SESSION_LOCK_FD"
fi
}
# Re-acquire the session lock. Blocks until available or timeout.
# Opens the lock fd if not already open (for use by external callers).
# Args: [timeout_secs] (default 300)
# Returns 0 on success, 1 on timeout/error.
# shellcheck disable=SC2120 # timeout arg is used by external callers
session_lock_acquire() {
local timeout="${1:-300}"
if [ -z "${SESSION_LOCK_FD:-}" ]; then
local lock_dir="${HOME}/.claude"
mkdir -p "$lock_dir"
exec {SESSION_LOCK_FD}>>"${lock_dir}/session.lock"
fi
flock -w "$timeout" "$SESSION_LOCK_FD"
}
# Wait for the Claude ready prompt in a tmux pane.
# Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise.
agent_wait_for_claude_ready() {
local session="$1"
local timeout="${2:-120}"
local elapsed=0
while [ "$elapsed" -lt "$timeout" ]; do
if tmux capture-pane -t "$session" -p 2>/dev/null | grep -q ''; then
return 0
fi
sleep 2
elapsed=$((elapsed + 2))
done
return 1
}
# Paste TEXT into SESSION (waits for Claude to be ready first), then press Enter.
agent_inject_into_session() {
local session="$1"
local text="$2"
local tmpfile
# Re-acquire session lock before injecting — Claude will resume working
# shellcheck disable=SC2119 # using default timeout
session_lock_acquire || true
agent_wait_for_claude_ready "$session" 120 || true
# Clear idle marker — new work incoming
rm -f "/tmp/claude-idle-${session}.ts"
tmpfile=$(mktemp /tmp/agent-inject-XXXXXX)
printf '%s' "$text" > "$tmpfile"
tmux load-buffer -b "agent-inject-$$" "$tmpfile"
tmux paste-buffer -t "$session" -b "agent-inject-$$"
sleep 0.5
tmux send-keys -t "$session" "" Enter
tmux delete-buffer -b "agent-inject-$$" 2>/dev/null || true
rm -f "$tmpfile"
}
# Create a tmux session running Claude in the given workdir.
# Installs a Stop hook for idle detection (see monitor_phase_loop).
# Installs a PreToolUse hook to guard destructive Bash operations.
# Optionally installs a PostToolUse hook for phase file write detection.
# Optionally installs a StopFailure hook for immediate phase file update on API error.
# Args: session workdir [phase_file]
# Returns 0 if session is ready, 1 otherwise.
create_agent_session() {
local session="$1"
local workdir="${2:-.}"
local phase_file="${3:-}"
# Prepare settings directory for hooks
mkdir -p "${workdir}/.claude"
local settings="${workdir}/.claude/settings.json"
# Install Stop hook for idle detection: when Claude finishes a response,
# the hook writes a timestamp to a marker file. monitor_phase_loop checks
# this marker instead of fragile tmux pane scraping.
local idle_marker="/tmp/claude-idle-${session}.ts"
local hook_script="${FACTORY_ROOT}/lib/hooks/on-idle-stop.sh"
if [ -x "$hook_script" ]; then
local hook_cmd="${hook_script} ${idle_marker}"
# When a phase file is available, pass it and the session name so the
# hook can nudge Claude if it returns to the prompt without signalling.
if [ -n "$phase_file" ]; then
hook_cmd="${hook_script} ${idle_marker} ${phase_file} ${session}"
fi
if [ -f "$settings" ]; then
# Append our Stop hook to existing project settings
jq --arg cmd "$hook_cmd" '
if (.hooks.Stop // [] | any(.[]; .hooks[]?.command == $cmd))
then .
else .hooks.Stop = (.hooks.Stop // []) + [{
matcher: "",
hooks: [{type: "command", command: $cmd}]
}]
end
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
else
jq -n --arg cmd "$hook_cmd" '{
hooks: {
Stop: [{
matcher: "",
hooks: [{type: "command", command: $cmd}]
}]
}
}' > "$settings"
fi
fi
# Install PostToolUse hook for phase file write detection: when Claude
# writes to the phase file via Bash or Write, the hook writes a marker
# so monitor_phase_loop can react immediately instead of waiting for
# the next mtime-based poll cycle.
if [ -n "$phase_file" ]; then
local phase_marker="/tmp/phase-changed-${session}.marker"
local phase_hook_script="${FACTORY_ROOT}/lib/hooks/on-phase-change.sh"
if [ -x "$phase_hook_script" ]; then
local phase_hook_cmd="${phase_hook_script} ${phase_file} ${phase_marker}"
if [ -f "$settings" ]; then
jq --arg cmd "$phase_hook_cmd" '
if (.hooks.PostToolUse // [] | any(.[]; .hooks[]?.command == $cmd))
then .
else .hooks.PostToolUse = (.hooks.PostToolUse // []) + [{
matcher: "Bash|Write",
hooks: [{type: "command", command: $cmd}]
}]
end
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
else
jq -n --arg cmd "$phase_hook_cmd" '{
hooks: {
PostToolUse: [{
matcher: "Bash|Write",
hooks: [{type: "command", command: $cmd}]
}]
}
}' > "$settings"
fi
rm -f "$phase_marker"
fi
fi
# Install StopFailure hook for immediate phase file update on API error:
# when Claude hits a rate limit, server error, billing error, or auth failure,
# the hook writes PHASE:failed to the phase file and touches the phase-changed
# marker so monitor_phase_loop picks it up within one poll cycle instead of
# waiting for idle timeout (up to 2 hours).
if [ -n "$phase_file" ]; then
local stop_failure_hook_script="${FACTORY_ROOT}/lib/hooks/on-stop-failure.sh"
if [ -x "$stop_failure_hook_script" ]; then
# phase_marker is defined in the PostToolUse block above; redeclare so
# this block is self-contained if that block is ever removed.
local sf_phase_marker="/tmp/phase-changed-${session}.marker"
local stop_failure_hook_cmd="${stop_failure_hook_script} ${phase_file} ${sf_phase_marker}"
if [ -f "$settings" ]; then
jq --arg cmd "$stop_failure_hook_cmd" '
if (.hooks.StopFailure // [] | any(.[]; .hooks[]?.command == $cmd))
then .
else .hooks.StopFailure = (.hooks.StopFailure // []) + [{
matcher: "rate_limit|server_error|authentication_failed|billing_error",
hooks: [{type: "command", command: $cmd}]
}]
end
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
else
jq -n --arg cmd "$stop_failure_hook_cmd" '{
hooks: {
StopFailure: [{
matcher: "rate_limit|server_error|authentication_failed|billing_error",
hooks: [{type: "command", command: $cmd}]
}]
}
}' > "$settings"
fi
fi
fi
# Install PreToolUse hook for destructive operation guard: blocks force push
# to primary branch, rm -rf outside worktree, direct API merge calls, and
# checkout/switch to primary branch. Claude sees the denial reason on exit 2
# and can self-correct.
local guard_hook_script="${FACTORY_ROOT}/lib/hooks/on-pretooluse-guard.sh"
if [ -x "$guard_hook_script" ]; then
local abs_workdir
abs_workdir=$(cd "$workdir" 2>/dev/null && pwd) || abs_workdir="$workdir"
local guard_hook_cmd="${guard_hook_script} ${PRIMARY_BRANCH:-main} ${abs_workdir} ${session}"
if [ -f "$settings" ]; then
jq --arg cmd "$guard_hook_cmd" '
if (.hooks.PreToolUse // [] | any(.[]; .hooks[]?.command == $cmd))
then .
else .hooks.PreToolUse = (.hooks.PreToolUse // []) + [{
matcher: "Bash",
hooks: [{type: "command", command: $cmd}]
}]
end
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
else
jq -n --arg cmd "$guard_hook_cmd" '{
hooks: {
PreToolUse: [{
matcher: "Bash",
hooks: [{type: "command", command: $cmd}]
}]
}
}' > "$settings"
fi
fi
# Install SessionEnd hook for guaranteed cleanup: when the Claude session
# exits (clean or crash), write a termination marker so monitor_phase_loop
# detects the exit faster than tmux has-session polling alone.
local exit_marker="/tmp/claude-exited-${session}.ts"
local session_end_hook_script="${FACTORY_ROOT}/lib/hooks/on-session-end.sh"
if [ -x "$session_end_hook_script" ]; then
local session_end_hook_cmd="${session_end_hook_script} ${exit_marker}"
if [ -f "$settings" ]; then
jq --arg cmd "$session_end_hook_cmd" '
if (.hooks.SessionEnd // [] | any(.[]; .hooks[]?.command == $cmd))
then .
else .hooks.SessionEnd = (.hooks.SessionEnd // []) + [{
matcher: "",
hooks: [{type: "command", command: $cmd}]
}]
end
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
else
jq -n --arg cmd "$session_end_hook_cmd" '{
hooks: {
SessionEnd: [{
matcher: "",
hooks: [{type: "command", command: $cmd}]
}]
}
}' > "$settings"
fi
fi
rm -f "$exit_marker"
# Install SessionStart hook for context re-injection after compaction:
# when Claude Code compacts context during long sessions, the phase protocol
# instructions are lost. This hook fires after each compaction and outputs
# the content of a context file so Claude retains critical instructions.
# The context file is written by callers via write_compact_context().
if [ -n "$phase_file" ]; then
local compact_hook_script="${FACTORY_ROOT}/lib/hooks/on-compact-reinject.sh"
if [ -x "$compact_hook_script" ]; then
local context_file="${phase_file%.phase}.context"
local compact_hook_cmd="${compact_hook_script} ${context_file}"
if [ -f "$settings" ]; then
jq --arg cmd "$compact_hook_cmd" '
if (.hooks.SessionStart // [] | any(.[]; .hooks[]?.command == $cmd))
then .
else .hooks.SessionStart = (.hooks.SessionStart // []) + [{
matcher: "compact",
hooks: [{type: "command", command: $cmd}]
}]
end
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
else
jq -n --arg cmd "$compact_hook_cmd" '{
hooks: {
SessionStart: [{
matcher: "compact",
hooks: [{type: "command", command: $cmd}]
}]
}
}' > "$settings"
fi
fi
fi
rm -f "$idle_marker"
local model_flag=""
if [ -n "${CLAUDE_MODEL:-}" ]; then
model_flag="--model ${CLAUDE_MODEL}"
fi
# Acquire a session-level mutex via fd-based flock to prevent concurrent
# Claude sessions from racing on OAuth token refresh. Unlike the previous
# command-wrapper flock, the fd approach allows callers to release the lock
# during idle phases (awaiting_review/awaiting_ci) and re-acquire before
# injecting the next prompt. See #724.
# Use ~/.claude/session.lock so the lock is shared across containers when
# the host ~/.claude directory is bind-mounted.
local lock_dir="${HOME}/.claude"
mkdir -p "$lock_dir"
local claude_lock="${lock_dir}/session.lock"
if [ -z "${SESSION_LOCK_FD:-}" ]; then
exec {SESSION_LOCK_FD}>>"${claude_lock}"
fi
if ! flock -w 300 "$SESSION_LOCK_FD"; then
return 1
fi
local claude_cmd="claude --dangerously-skip-permissions ${model_flag}"
tmux new-session -d -s "$session" -c "$workdir" \
"$claude_cmd" 2>/dev/null
sleep 1
tmux has-session -t "$session" 2>/dev/null || return 1
agent_wait_for_claude_ready "$session" 120 || return 1
return 0
}
# Inject a prompt/formula into a session (alias for agent_inject_into_session).
inject_formula() {
agent_inject_into_session "$@"
}
# Monitor a phase file, calling a callback on changes and handling idle timeout.
# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, crashed, PHASE:failed, PHASE:escalate).
# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME).
# Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly.
# Args: phase_file idle_timeout_secs callback_fn [session_name]
# session_name — tmux session to health-check; falls back to $SESSION_NAME global
#
# Idle detection: uses a Stop hook marker file (written by lib/hooks/on-idle-stop.sh)
# to detect when Claude finishes responding without writing a phase signal.
# If the marker exists for 3 consecutive polls with no phase written, the session
# is killed and the callback invoked with "PHASE:failed".
monitor_phase_loop() {
local phase_file="$1"
local idle_timeout="$2"
local callback="$3"
local _session="${4:-${SESSION_NAME:-}}"
# Export resolved session name so callbacks can reference it regardless of
# which session was passed to monitor_phase_loop (analogous to _MONITOR_LOOP_EXIT).
export _MONITOR_SESSION="$_session"
local poll_interval="${PHASE_POLL_INTERVAL:-10}"
local last_mtime=0
local idle_elapsed=0
local idle_pane_count=0
while true; do
sleep "$poll_interval"
idle_elapsed=$(( idle_elapsed + poll_interval ))
# Session health check: SessionEnd hook marker provides fast detection,
# tmux has-session is the fallback for unclean exits (e.g. tmux crash).
local exit_marker="/tmp/claude-exited-${_session}.ts"
if [ -f "$exit_marker" ] || ! tmux has-session -t "${_session}" 2>/dev/null; then
local current_phase
current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
case "$current_phase" in
PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate)
;; # terminal — fall through to phase handler
*)
# Call callback with "crashed" — let agent-specific code handle recovery
if type "${callback}" &>/dev/null; then
"$callback" "PHASE:crashed"
fi
# If callback didn't restart session, break
if ! tmux has-session -t "${_session}" 2>/dev/null; then
_MONITOR_LOOP_EXIT="crashed"
return 1
fi
idle_elapsed=0
idle_pane_count=0
continue
;;
esac
fi
# Check phase-changed marker from PostToolUse hook — if present, the hook
# detected a phase file write so we reset last_mtime to force processing
# this cycle instead of waiting for the next mtime change.
local phase_marker="/tmp/phase-changed-${_session}.marker"
if [ -f "$phase_marker" ]; then
rm -f "$phase_marker"
last_mtime=0
fi
# Check phase file for changes
local phase_mtime
phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0)
local current_phase
current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$last_mtime" ]; then
# No phase change — check idle timeout
if [ "$idle_elapsed" -ge "$idle_timeout" ]; then
_MONITOR_LOOP_EXIT="idle_timeout"
agent_kill_session "${_session}"
return 0
fi
# Idle detection via Stop hook: the on-idle-stop.sh hook writes a marker
# file when Claude finishes a response. If the marker exists and no phase
# has been written, Claude returned to the prompt without following the
# phase protocol. 3 consecutive polls = confirmed idle (not mid-turn).
local idle_marker="/tmp/claude-idle-${_session}.ts"
if [ -z "$current_phase" ] && [ -f "$idle_marker" ]; then
idle_pane_count=$(( idle_pane_count + 1 ))
if [ "$idle_pane_count" -ge 3 ]; then
_MONITOR_LOOP_EXIT="idle_prompt"
# Session is killed before the callback is invoked.
# Callbacks that handle PHASE:failed must not assume the session is alive.
agent_kill_session "${_session}"
if type "${callback}" &>/dev/null; then
"$callback" "PHASE:failed"
fi
return 0
fi
else
idle_pane_count=0
fi
continue
fi
# Phase changed
last_mtime="$phase_mtime"
# shellcheck disable=SC2034 # read by phase-handler.sh callback
LAST_PHASE_MTIME="$phase_mtime"
idle_elapsed=0
idle_pane_count=0
# Terminal phases
case "$current_phase" in
PHASE:done|PHASE:merged)
_MONITOR_LOOP_EXIT="done"
if type "${callback}" &>/dev/null; then
"$callback" "$current_phase"
fi
return 0
;;
PHASE:failed|PHASE:escalate)
_MONITOR_LOOP_EXIT="$current_phase"
if type "${callback}" &>/dev/null; then
"$callback" "$current_phase"
fi
return 0
;;
esac
# Non-terminal phase — call callback
if type "${callback}" &>/dev/null; then
"$callback" "$current_phase"
fi
done
}
# Write context to a file for re-injection after context compaction.
# The SessionStart compact hook reads this file and outputs it to stdout.
# Args: phase_file content
write_compact_context() {
local phase_file="$1"
local content="$2"
local context_file="${phase_file%.phase}.context"
printf '%s\n' "$content" > "$context_file"
}
# Kill a tmux session gracefully (no-op if not found).
agent_kill_session() {
local session="${1:-}"
[ -n "$session" ] && tmux kill-session -t "$session" 2>/dev/null || true
rm -f "/tmp/claude-idle-${session}.ts"
rm -f "/tmp/phase-changed-${session}.marker"
rm -f "/tmp/claude-exited-${session}.ts"
rm -f "/tmp/claude-nudge-${session}.count"
}
# Read the current phase from a phase file, stripped of whitespace.
# Usage: read_phase [file] — defaults to $PHASE_FILE
read_phase() {
local file="${1:-${PHASE_FILE:-}}"
{ cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]'
}

View file

@ -1,591 +0,0 @@
#!/usr/bin/env bash
# branch-protection.sh — Helper for setting up branch protection on repos
#
# Source after lib/env.sh:
# source "$(dirname "$0")/../lib/env.sh"
# source "$(dirname "$0")/lib/branch-protection.sh"
#
# Required globals: FORGE_TOKEN, FORGE_URL, FORGE_OPS_REPO
#
# Functions:
# setup_vault_branch_protection — Set up admin-only branch protection for main
# verify_branch_protection — Verify protection is configured correctly
# setup_profile_branch_protection — Set up admin-only branch protection for .profile repos
# remove_branch_protection — Remove branch protection (for cleanup/testing)
#
# Branch protection settings:
# - Require 1 approval before merge
# - Restrict merge to admin role (not regular collaborators or bots)
# - Block direct pushes to main (all changes must go through PR)
set -euo pipefail
# Internal log helper
_bp_log() {
if declare -f log >/dev/null 2>&1; then
log "branch-protection: $*"
else
printf '[%s] branch-protection: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2
fi
}
# Get ops repo API URL
_ops_api() {
printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}"
}
# -----------------------------------------------------------------------------
# setup_vault_branch_protection — Set up admin-only branch protection for main
#
# Configures the following protection rules:
# - Require 1 approval before merge
# - Restrict merge to admin role (not regular collaborators or bots)
# - Block direct pushes to main (all changes must go through PR)
#
# Returns: 0 on success, 1 on failure
# -----------------------------------------------------------------------------
setup_vault_branch_protection() {
local branch="${1:-main}"
local api_url
api_url="$(_ops_api)"
_bp_log "Setting up branch protection for ${branch} on ${FORGE_OPS_REPO}"
# Check if branch exists with retry loop (handles race condition after initial push)
local branch_exists="0"
local max_attempts=3
local attempt=1
while [ "$attempt" -le "$max_attempts" ]; do
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
if [ "$branch_exists" = "200" ]; then
_bp_log "Branch ${branch} exists on ${FORGE_OPS_REPO}"
break
fi
if [ "$attempt" -lt "$max_attempts" ]; then
_bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..."
sleep 2
fi
attempt=$((attempt + 1))
done
if [ "$branch_exists" != "200" ]; then
_bp_log "ERROR: Branch ${branch} does not exist on ${FORGE_OPS_REPO} after ${max_attempts} attempts"
return 1
fi
# Check if protection already exists
local protection_exists
protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0")
if [ "$protection_exists" = "200" ]; then
_bp_log "Branch protection already exists for ${branch}"
_bp_log "Updating existing protection rules"
fi
# Create/update branch protection
# Note: Forgejo API uses "require_signed_commits" and "required_approvals" for approval requirements
# The "admin_enforced" field ensures only admins can merge
local protection_json
protection_json=$(cat <<EOF
{
"enable_push": false,
"enable_force_push": false,
"enable_merge_commit": true,
"enable_rebase": true,
"enable_rebase_merge": true,
"required_approvals": 1,
"required_signatures": false,
"admin_enforced": true,
"required_status_checks": false,
"required_linear_history": false
}
EOF
)
local http_code
if [ "$protection_exists" = "200" ]; then
# Update existing protection
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
-X PUT \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${api_url}/branches/${branch}/protection" \
-d "$protection_json" || echo "0")
else
# Create new protection
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
-X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${api_url}/branches/${branch}/protection" \
-d "$protection_json" || echo "0")
fi
if [ "$http_code" != "200" ] && [ "$http_code" != "201" ]; then
_bp_log "ERROR: Failed to set up branch protection (HTTP ${http_code})"
return 1
fi
_bp_log "Branch protection configured successfully for ${branch}"
_bp_log " - Pushes blocked: true"
_bp_log " - Force pushes blocked: true"
_bp_log " - Required approvals: 1"
_bp_log " - Admin enforced: true"
return 0
}
# -----------------------------------------------------------------------------
# verify_branch_protection — Verify protection is configured correctly
#
# Returns: 0 if protection is configured correctly, 1 otherwise
# -----------------------------------------------------------------------------
verify_branch_protection() {
local branch="${1:-main}"
local api_url
api_url="$(_ops_api)"
_bp_log "Verifying branch protection for ${branch}"
# Get current protection settings
local protection_json
protection_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/branches/${branch}/protection" 2>/dev/null || true)
if [ -z "$protection_json" ] || [ "$protection_json" = "null" ]; then
_bp_log "ERROR: No branch protection found for ${branch}"
return 1
fi
# Extract and validate settings
local enable_push enable_merge_commit required_approvals admin_enforced
enable_push=$(printf '%s' "$protection_json" | jq -r '.enable_push // true')
enable_merge_commit=$(printf '%s' "$protection_json" | jq -r '.enable_merge_commit // false')
required_approvals=$(printf '%s' "$protection_json" | jq -r '.required_approvals // 0')
admin_enforced=$(printf '%s' "$protection_json" | jq -r '.admin_enforced // false')
local errors=0
# Check push is disabled
if [ "$enable_push" = "true" ]; then
_bp_log "ERROR: enable_push should be false"
errors=$((errors + 1))
else
_bp_log "OK: Pushes are blocked"
fi
# Check merge commit is enabled
if [ "$enable_merge_commit" != "true" ]; then
_bp_log "ERROR: enable_merge_commit should be true"
errors=$((errors + 1))
else
_bp_log "OK: Merge commits are allowed"
fi
# Check required approvals
if [ "$required_approvals" -lt 1 ]; then
_bp_log "ERROR: required_approvals should be at least 1"
errors=$((errors + 1))
else
_bp_log "OK: Required approvals: ${required_approvals}"
fi
# Check admin enforced
if [ "$admin_enforced" != "true" ]; then
_bp_log "ERROR: admin_enforced should be true"
errors=$((errors + 1))
else
_bp_log "OK: Admin enforcement enabled"
fi
if [ "$errors" -gt 0 ]; then
_bp_log "Verification failed with ${errors} error(s)"
return 1
fi
_bp_log "Branch protection verified successfully"
return 0
}
# -----------------------------------------------------------------------------
# setup_profile_branch_protection — Set up admin-only branch protection for .profile repos
#
# Configures the following protection rules:
# - Require 1 approval before merge
# - Restrict merge to admin role (not regular collaborators or bots)
# - Block direct pushes to main (all changes must go through PR)
#
# Also creates a 'journal' branch for direct agent journal pushes
#
# Args:
# $1 - Repo path in format 'owner/repo' (e.g., 'dev-bot/.profile')
# $2 - Branch to protect (default: main)
#
# Returns: 0 on success, 1 on failure
# -----------------------------------------------------------------------------
setup_profile_branch_protection() {
local repo="${1:-}"
local branch="${2:-main}"
if [ -z "$repo" ]; then
_bp_log "ERROR: repo path required (format: owner/repo)"
return 1
fi
_bp_log "Setting up branch protection for ${branch} on ${repo}"
local api_url
api_url="${FORGE_URL}/api/v1/repos/${repo}"
# Check if branch exists with retry loop (handles race condition after initial push)
local branch_exists="0"
local max_attempts=3
local attempt=1
while [ "$attempt" -le "$max_attempts" ]; do
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
if [ "$branch_exists" = "200" ]; then
_bp_log "Branch ${branch} exists on ${repo}"
break
fi
if [ "$attempt" -lt "$max_attempts" ]; then
_bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..."
sleep 2
fi
attempt=$((attempt + 1))
done
if [ "$branch_exists" != "200" ]; then
_bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts"
return 1
fi
# Check if protection already exists
local protection_exists
protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0")
if [ "$protection_exists" = "200" ]; then
_bp_log "Branch protection already exists for ${branch}"
_bp_log "Updating existing protection rules"
fi
# Create/update branch protection
local protection_json
protection_json=$(cat <<EOF
{
"enable_push": false,
"enable_force_push": false,
"enable_merge_commit": true,
"enable_rebase": true,
"enable_rebase_merge": true,
"required_approvals": 1,
"required_signatures": false,
"admin_enforced": true,
"required_status_checks": false,
"required_linear_history": false
}
EOF
)
local http_code
if [ "$protection_exists" = "200" ]; then
# Update existing protection
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
-X PUT \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${api_url}/branches/${branch}/protection" \
-d "$protection_json" || echo "0")
else
# Create new protection
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
-X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${api_url}/branches/${branch}/protection" \
-d "$protection_json" || echo "0")
fi
if [ "$http_code" != "200" ] && [ "$http_code" != "201" ]; then
_bp_log "ERROR: Failed to set up branch protection (HTTP ${http_code})"
return 1
fi
_bp_log "Branch protection configured successfully for ${branch}"
_bp_log " - Pushes blocked: true"
_bp_log " - Force pushes blocked: true"
_bp_log " - Required approvals: 1"
_bp_log " - Admin enforced: true"
# Create journal branch for direct agent journal pushes
_bp_log "Creating 'journal' branch for direct agent journal pushes"
local journal_branch="journal"
local journal_exists
journal_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/git/branches/${journal_branch}" 2>/dev/null || echo "0")
if [ "$journal_exists" != "200" ]; then
# Create journal branch from main
# Get the commit hash of main
local main_commit
main_commit=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/git/refs/heads/${branch}" 2>/dev/null | jq -r '.[0].object.sha' || echo "")
if [ -n "$main_commit" ]; then
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${api_url}/git/refs" \
-d "{\"ref\":\"refs/heads/${journal_branch}\",\"sha\":\"${main_commit}\"}" >/dev/null 2>&1 || {
_bp_log "Warning: failed to create journal branch (may already exist)"
}
fi
fi
_bp_log "Journal branch '${journal_branch}' ready for direct pushes"
return 0
}
# -----------------------------------------------------------------------------
# remove_branch_protection — Remove branch protection (for cleanup/testing)
#
# Returns: 0 on success, 1 on failure
# -----------------------------------------------------------------------------
remove_branch_protection() {
local branch="${1:-main}"
local api_url
api_url="$(_ops_api)"
_bp_log "Removing branch protection for ${branch}"
# Check if protection exists
local protection_exists
protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0")
if [ "$protection_exists" != "200" ]; then
_bp_log "No branch protection found for ${branch}"
return 0
fi
# Delete protection
local http_code
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
-X DELETE \
-H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0")
if [ "$http_code" != "204" ]; then
_bp_log "ERROR: Failed to remove branch protection (HTTP ${http_code})"
return 1
fi
_bp_log "Branch protection removed successfully for ${branch}"
return 0
}
# -----------------------------------------------------------------------------
# setup_project_branch_protection — Set up branch protection for project repos
#
# Configures the following protection rules:
# - Block direct pushes to main (all changes must go through PR)
# - Require 1 approval before merge
# - Allow merge only via dev-bot (for auto-merge after review+CI)
# - Allow review-bot to approve PRs
#
# Args:
# $1 - Repo path in format 'owner/repo' (e.g., 'disinto-admin/disinto')
# $2 - Branch to protect (default: main)
#
# Returns: 0 on success, 1 on failure
# -----------------------------------------------------------------------------
setup_project_branch_protection() {
local repo="${1:-}"
local branch="${2:-main}"
if [ -z "$repo" ]; then
_bp_log "ERROR: repo path required (format: owner/repo)"
return 1
fi
_bp_log "Setting up branch protection for ${branch} on ${repo}"
local api_url
api_url="${FORGE_URL}/api/v1/repos/${repo}"
# Check if branch exists with retry loop (handles race condition after initial push)
local branch_exists="0"
local max_attempts=3
local attempt=1
while [ "$attempt" -le "$max_attempts" ]; do
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
if [ "$branch_exists" = "200" ]; then
_bp_log "Branch ${branch} exists on ${repo}"
break
fi
if [ "$attempt" -lt "$max_attempts" ]; then
_bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..."
sleep 2
fi
attempt=$((attempt + 1))
done
if [ "$branch_exists" != "200" ]; then
_bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts"
return 1
fi
# Check if protection already exists
local protection_exists
protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0")
if [ "$protection_exists" = "200" ]; then
_bp_log "Branch protection already exists for ${branch}"
_bp_log "Updating existing protection rules"
fi
# Create/update branch protection
# Forgejo API for branch protection (factory mode):
# - enable_push: false (block direct pushes)
# - enable_merge_whitelist: true (only whitelisted users can merge)
# - merge_whitelist_usernames: ["dev-bot"] (dev-bot merges after CI)
# - required_approvals: 1 (review-bot must approve)
local protection_json
protection_json=$(cat <<EOF
{
"enable_push": false,
"enable_force_push": false,
"enable_merge_commit": true,
"enable_rebase": true,
"enable_rebase_merge": true,
"required_approvals": 1,
"required_signatures": false,
"enable_merge_whitelist": true,
"merge_whitelist_usernames": ["dev-bot"],
"required_status_checks": false,
"required_linear_history": false
}
EOF
)
local http_code
if [ "$protection_exists" = "200" ]; then
# Update existing protection
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
-X PUT \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${api_url}/branches/${branch}/protection" \
-d "$protection_json" || echo "0")
else
# Create new protection
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
-X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${api_url}/branches/${branch}/protection" \
-d "$protection_json" || echo "0")
fi
if [ "$http_code" != "200" ] && [ "$http_code" != "201" ]; then
_bp_log "ERROR: Failed to set up branch protection (HTTP ${http_code})"
return 1
fi
_bp_log "Branch protection configured successfully for ${branch}"
_bp_log " - Pushes blocked: true"
_bp_log " - Force pushes blocked: true"
_bp_log " - Required approvals: 1"
_bp_log " - Merge whitelist: dev-bot only"
_bp_log " - review-bot can approve: yes"
return 0
}
# -----------------------------------------------------------------------------
# Test mode — run when executed directly
# -----------------------------------------------------------------------------
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
# Check required env vars
if [ -z "${FORGE_TOKEN:-}" ]; then
echo "ERROR: FORGE_TOKEN is required" >&2
exit 1
fi
if [ -z "${FORGE_URL:-}" ]; then
echo "ERROR: FORGE_URL is required" >&2
exit 1
fi
if [ -z "${FORGE_OPS_REPO:-}" ]; then
echo "ERROR: FORGE_OPS_REPO is required" >&2
exit 1
fi
# Parse command line args
case "${1:-help}" in
setup)
setup_vault_branch_protection "${2:-main}"
;;
setup-profile)
if [ -z "${2:-}" ]; then
echo "ERROR: repo path required (format: owner/repo)" >&2
exit 1
fi
setup_profile_branch_protection "${2}" "${3:-main}"
;;
setup-project)
if [ -z "${2:-}" ]; then
echo "ERROR: repo path required (format: owner/repo)" >&2
exit 1
fi
setup_project_branch_protection "${2}" "${3:-main}"
;;
verify)
verify_branch_protection "${2:-main}"
;;
remove)
remove_branch_protection "${2:-main}"
;;
help|*)
echo "Usage: $0 {setup|setup-profile|setup-project|verify|remove} [args...]"
echo ""
echo "Commands:"
echo " setup [branch] Set up branch protection on ops repo (default: main)"
echo " setup-profile <repo> [branch] Set up branch protection on .profile repo"
echo " setup-project <repo> [branch] Set up branch protection on project repo"
echo " verify [branch] Verify branch protection is configured correctly"
echo " remove [branch] Remove branch protection (for cleanup/testing)"
echo ""
echo "Required environment variables:"
echo " FORGE_TOKEN Forgejo API token (admin user recommended)"
echo " FORGE_URL Forgejo instance URL (e.g., https://codeberg.org)"
echo " FORGE_OPS_REPO Ops repo in format owner/repo (e.g., disinto-admin/disinto-ops)"
exit 0
;;
esac
fi

View file

@ -17,11 +17,6 @@ REPO="${FORGE_REPO}"
API="${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}" API="${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}"
api() { api() {
# Validate API URL to prevent URL injection
if ! validate_url "$API"; then
echo "ERROR: API URL validation failed - possible URL injection attempt" >&2
return 1
fi
curl -sf -H "Authorization: Bearer ${WOODPECKER_TOKEN}" "${API}/$1" curl -sf -H "Authorization: Bearer ${WOODPECKER_TOKEN}" "${API}/$1"
} }

View file

@ -7,6 +7,27 @@ set -euo pipefail
# ci_commit_status() / ci_pipeline_number() require: woodpecker_api(), forge_api() (from env.sh) # ci_commit_status() / ci_pipeline_number() require: woodpecker_api(), forge_api() (from env.sh)
# classify_pipeline_failure() requires: woodpecker_api() (defined in env.sh) # classify_pipeline_failure() requires: woodpecker_api() (defined in env.sh)
# ensure_blocked_label_id — look up (or create) the "blocked" label, print its ID.
# Caches the result in _BLOCKED_LABEL_ID to avoid repeated API calls.
# Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api()
ensure_blocked_label_id() {
if [ -n "${_BLOCKED_LABEL_ID:-}" ]; then
printf '%s' "$_BLOCKED_LABEL_ID"
return 0
fi
_BLOCKED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \
| jq -r '.[] | select(.name == "blocked") | .id' 2>/dev/null || true)
if [ -z "$_BLOCKED_LABEL_ID" ]; then
_BLOCKED_LABEL_ID=$(curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/labels" \
-d '{"name":"blocked","color":"#e11d48"}' 2>/dev/null \
| jq -r '.id // empty' 2>/dev/null || true)
fi
printf '%s' "$_BLOCKED_LABEL_ID"
}
# ensure_priority_label — look up (or create) the "priority" label, print its ID. # ensure_priority_label — look up (or create) the "priority" label, print its ID.
# Caches the result in _PRIORITY_LABEL_ID to avoid repeated API calls. # Caches the result in _PRIORITY_LABEL_ID to avoid repeated API calls.
# Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api() # Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api()
@ -246,42 +267,3 @@ ci_promote() {
echo "$new_num" echo "$new_num"
} }
# ci_get_logs <pipeline_number> [--step <step_name>]
# Reads CI logs from the Woodpecker SQLite database.
# Requires: WOODPECKER_DATA_DIR env var or mounted volume at /woodpecker-data
# Returns: 0 on success, 1 on failure. Outputs log text to stdout.
#
# Usage:
# ci_get_logs 346 # Get all failed step logs
# ci_get_logs 346 --step smoke-init # Get logs for specific step
ci_get_logs() {
local pipeline_number="$1"
shift || true
local step_name=""
while [ $# -gt 0 ]; do
case "$1" in
--step|-s)
step_name="$2"
shift 2
;;
*)
echo "Unknown option: $1" >&2
return 1
;;
esac
done
local log_reader="${FACTORY_ROOT:-/home/agent/disinto}/lib/ci-log-reader.py"
if [ -f "$log_reader" ]; then
if [ -n "$step_name" ]; then
python3 "$log_reader" "$pipeline_number" --step "$step_name"
else
python3 "$log_reader" "$pipeline_number"
fi
else
echo "ERROR: ci-log-reader.py not found at $log_reader" >&2
return 1
fi
}

View file

@ -1,125 +0,0 @@
#!/usr/bin/env python3
"""
ci-log-reader.py Read CI logs from Woodpecker SQLite database.
Usage:
ci-log-reader.py <pipeline_number> [--step <step_name>]
Reads log entries from the Woodpecker SQLite database and outputs them to stdout.
If --step is specified, filters to that step only. Otherwise returns logs from
all failed steps, truncated to the last 200 lines to avoid context bloat.
Environment:
WOODPECKER_DATA_DIR - Path to Woodpecker data directory (default: /woodpecker-data)
The SQLite database is located at: $WOODPECKER_DATA_DIR/woodpecker.sqlite
"""
import argparse
import sqlite3
import sys
import os
DEFAULT_DB_PATH = "/woodpecker-data/woodpecker.sqlite"
DEFAULT_WOODPECKER_DATA_DIR = "/woodpecker-data"
MAX_OUTPUT_LINES = 200
def get_db_path():
"""Determine the path to the Woodpecker SQLite database."""
env_dir = os.environ.get("WOODPECKER_DATA_DIR", DEFAULT_WOODPECKER_DATA_DIR)
return os.path.join(env_dir, "woodpecker.sqlite")
def query_logs(pipeline_number: int, step_name: str | None = None) -> list[str]:
"""
Query log entries from the Woodpecker database.
Args:
pipeline_number: The pipeline number to query
step_name: Optional step name to filter by
Returns:
List of log data strings
"""
db_path = get_db_path()
if not os.path.exists(db_path):
print(f"ERROR: Woodpecker database not found at {db_path}", file=sys.stderr)
print(f"Set WOODPECKER_DATA_DIR or mount volume to {DEFAULT_WOODPECKER_DATA_DIR}", file=sys.stderr)
sys.exit(1)
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
if step_name:
# Query logs for a specific step
query = """
SELECT le.data
FROM log_entries le
JOIN steps s ON le.step_id = s.id
JOIN pipelines p ON s.pipeline_id = p.id
WHERE p.number = ? AND s.name = ?
ORDER BY le.id
"""
cursor.execute(query, (pipeline_number, step_name))
else:
# Query logs for all failed steps in the pipeline
query = """
SELECT le.data
FROM log_entries le
JOIN steps s ON le.step_id = s.id
JOIN pipelines p ON s.pipeline_id = p.id
WHERE p.number = ? AND s.state IN ('failure', 'error', 'killed')
ORDER BY le.id
"""
cursor.execute(query, (pipeline_number,))
logs = [row["data"] for row in cursor.fetchall()]
conn.close()
return logs
def main():
parser = argparse.ArgumentParser(
description="Read CI logs from Woodpecker SQLite database"
)
parser.add_argument(
"pipeline_number",
type=int,
help="Pipeline number to query"
)
parser.add_argument(
"--step", "-s",
dest="step_name",
default=None,
help="Filter to a specific step name"
)
args = parser.parse_args()
logs = query_logs(args.pipeline_number, args.step_name)
if not logs:
if args.step_name:
print(f"No logs found for pipeline #{args.pipeline_number}, step '{args.step_name}'", file=sys.stderr)
else:
print(f"No failed steps found in pipeline #{args.pipeline_number}", file=sys.stderr)
sys.exit(0)
# Join all log data and output
full_output = "\n".join(logs)
# Truncate to last N lines to avoid context bloat
lines = full_output.split("\n")
if len(lines) > MAX_OUTPUT_LINES:
# Keep last N lines
truncated = lines[-MAX_OUTPUT_LINES:]
print("\n".join(truncated))
else:
print(full_output)
if __name__ == "__main__":
main()

View file

@ -1,456 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# ci-setup.sh — CI setup functions for Woodpecker and scheduling configuration
#
# Internal functions (called via _load_ci_context + _*_impl):
# _install_cron_impl() - Install crontab entries (bare-metal only; compose uses polling loop)
# _create_woodpecker_oauth_impl() - Create OAuth2 app on Forgejo for Woodpecker
# _generate_woodpecker_token_impl() - Auto-generate WOODPECKER_TOKEN via OAuth2 flow
# _activate_woodpecker_repo_impl() - Activate repo in Woodpecker
#
# Globals expected (asserted by _load_ci_context):
# FORGE_URL - Forge instance URL (e.g. http://localhost:3000)
# FORGE_TOKEN - Forge API token
# FACTORY_ROOT - Root of the disinto factory
#
# Usage:
# source "${FACTORY_ROOT}/lib/ci-setup.sh"
# =============================================================================
set -euo pipefail
# Assert required globals are set before using this module.
_load_ci_context() {
local missing=()
[ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL")
[ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN")
[ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
if [ "${#missing[@]}" -gt 0 ]; then
echo "Error: ci-setup.sh requires these globals to be set: ${missing[*]}" >&2
exit 1
fi
}
# Generate and optionally install cron entries for bare-metal deployments.
# In compose mode, the agents container uses a polling loop (entrypoint.sh) instead.
# Usage: install_cron <name> <toml_path> <auto_yes> <bare>
_install_cron_impl() {
local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}"
# In compose mode, skip host cron — the agents container uses a polling loop
if [ "$bare" = false ]; then
echo ""
echo "Cron: skipped (agents container handles scheduling in compose mode)"
return
fi
# Bare mode: crontab is required on the host
if ! command -v crontab &>/dev/null; then
echo "Error: crontab not found (required for bare-metal mode)" >&2
echo " Install: apt install cron / brew install cron" >&2
exit 1
fi
# Use absolute path for the TOML in cron entries
local abs_toml
abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")"
local cron_block
cron_block="# disinto: ${name}
2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1
4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1
0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1"
echo ""
echo "Cron entries to install:"
echo "$cron_block"
echo ""
# Check if cron entries already exist
local current_crontab
current_crontab=$(crontab -l 2>/dev/null || true)
if echo "$current_crontab" | grep -q "# disinto: ${name}"; then
echo "Cron: skipped (entries for ${name} already installed)"
return
fi
if [ "$auto_yes" = false ] && [ -t 0 ]; then
read -rp "Install these cron entries? [y/N] " confirm
if [[ ! "$confirm" =~ ^[Yy] ]]; then
echo "Skipped cron install. Add manually with: crontab -e"
return
fi
fi
# Append to existing crontab
if { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab -; then
echo "Cron entries installed for ${name}"
else
echo "Error: failed to install cron entries" >&2
return 1
fi
}
# Set up Woodpecker CI to use Forgejo as its forge backend.
# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo.
# Usage: create_woodpecker_oauth <forge_url> <repo_slug>
_create_woodpecker_oauth_impl() {
local forge_url="$1"
local _repo_slug="$2" # unused but required for signature compatibility
echo ""
echo "── Woodpecker OAuth2 setup ────────────────────────────"
# Create OAuth2 application on Forgejo for Woodpecker
local oauth2_name="woodpecker-ci"
local redirect_uri="http://localhost:8000/authorize"
local existing_app client_id client_secret
# Check if OAuth2 app already exists
existing_app=$(curl -sf \
-H "Authorization: token ${FORGE_TOKEN}" \
"${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \
| jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true
if [ -n "$existing_app" ]; then
echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})"
client_id="$existing_app"
else
local oauth2_resp
oauth2_resp=$(curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/user/applications/oauth2" \
-d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \
2>/dev/null) || oauth2_resp=""
if [ -z "$oauth2_resp" ]; then
echo "Warning: failed to create OAuth2 app on Forgejo" >&2
return
fi
client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty')
client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty')
if [ -z "$client_id" ]; then
echo "Warning: OAuth2 app creation returned no client_id" >&2
return
fi
echo "OAuth2: ${oauth2_name} created (client_id=${client_id})"
fi
# Store Woodpecker forge config in .env
# WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references
# WOODPECKER_HOST must be host-accessible URL to match OAuth2 redirect_uri
local env_file="${FACTORY_ROOT}/.env"
local wp_vars=(
"WOODPECKER_FORGEJO=true"
"WOODPECKER_FORGEJO_URL=${forge_url}"
"WOODPECKER_HOST=http://localhost:8000"
)
if [ -n "${client_id:-}" ]; then
wp_vars+=("WP_FORGEJO_CLIENT=${client_id}")
fi
if [ -n "${client_secret:-}" ]; then
wp_vars+=("WP_FORGEJO_SECRET=${client_secret}")
fi
for var_line in "${wp_vars[@]}"; do
local var_name="${var_line%%=*}"
if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then
sed -i "s|^${var_name}=.*|${var_line}|" "$env_file"
else
printf '%s\n' "$var_line" >> "$env_file"
fi
done
echo "Config: Woodpecker forge vars written to .env"
}
# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow.
# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created).
# Called after compose stack is up, before activate_woodpecker_repo.
# Usage: generate_woodpecker_token <forge_url>
_generate_woodpecker_token_impl() {
local forge_url="$1"
local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}"
local env_file="${FACTORY_ROOT}/.env"
local admin_user="disinto-admin"
local admin_pass="${_FORGE_ADMIN_PASS:-}"
# Skip if already set
if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then
echo "Config: WOODPECKER_TOKEN already set in .env"
return 0
fi
echo ""
echo "── Woodpecker token generation ────────────────────────"
if [ -z "$admin_pass" ]; then
echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2
echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2
return 1
fi
# Wait for Woodpecker to become ready
echo -n "Waiting for Woodpecker"
local retries=0
while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do
retries=$((retries + 1))
if [ "$retries" -gt 30 ]; then
echo ""
echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2
return 1
fi
echo -n "."
sleep 2
done
echo " ready"
# Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token
local cookie_jar auth_body_file
cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX)
auth_body_file=$(mktemp /tmp/wp-body-XXXXXX)
# Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent)
local csrf
csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \
| grep -o 'name="_csrf"[^>]*' | head -1 \
| grep -oE '(content|value)="[^"]*"' | head -1 \
| cut -d'"' -f2) || csrf=""
if [ -z "$csrf" ]; then
echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2
rm -f "$cookie_jar" "$auth_body_file"
return 1
fi
curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \
-o /dev/null \
"${forge_url}/user/login" \
--data-urlencode "_csrf=${csrf}" \
--data-urlencode "user_name=${admin_user}" \
--data-urlencode "password=${admin_pass}" \
2>/dev/null || true
# Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param)
local wp_redir
wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \
"${wp_server}/authorize" 2>/dev/null) || wp_redir=""
if [ -z "$wp_redir" ]; then
echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2
rm -f "$cookie_jar" "$auth_body_file"
return 1
fi
# Rewrite internal Docker network URLs to host-accessible URLs.
# Handle both plain and URL-encoded forms of the internal hostnames.
local forge_url_enc wp_server_enc
forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g')
wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g')
wp_redir=$(printf '%s' "$wp_redir" \
| sed "s|http://forgejo:3000|${forge_url}|g" \
| sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \
| sed "s|http://woodpecker:8000|${wp_server}|g" \
| sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g")
# Step 3: Hit Forgejo OAuth authorize endpoint with session
# First time: shows consent page. Already approved: redirects with code.
local auth_headers redirect_loc auth_code
auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \
-D - -o "$auth_body_file" \
"$wp_redir" 2>/dev/null) || auth_headers=""
redirect_loc=$(printf '%s' "$auth_headers" \
| grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}')
if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then
# Auto-approved: extract code from redirect
auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/')
else
# Consent page: extract CSRF and all form fields, POST grant approval
local consent_csrf form_client_id form_state form_redirect_uri
consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \
| head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \
| cut -d'"' -f2) || consent_csrf=""
form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \
| grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id=""
form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \
| grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state=""
form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \
| grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri=""
if [ -n "$consent_csrf" ]; then
local grant_headers
grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \
-D - -o /dev/null -X POST \
"${forge_url}/login/oauth/grant" \
--data-urlencode "_csrf=${consent_csrf}" \
--data-urlencode "client_id=${form_client_id}" \
--data-urlencode "state=${form_state}" \
--data-urlencode "scope=" \
--data-urlencode "nonce=" \
--data-urlencode "redirect_uri=${form_redirect_uri}" \
--data-urlencode "granted=true" \
2>/dev/null) || grant_headers=""
redirect_loc=$(printf '%s' "$grant_headers" \
| grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}')
if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then
auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/')
fi
fi
fi
rm -f "$auth_body_file"
if [ -z "${auth_code:-}" ]; then
echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2
rm -f "$cookie_jar"
return 1
fi
# Step 4: Complete Woodpecker OAuth callback (exchanges code for session)
local state
state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p')
local wp_headers wp_token
wp_headers=$(curl -sf -c "$cookie_jar" \
-D - -o /dev/null \
"${wp_server}/authorize?code=${auth_code}&state=${state:-}" \
2>/dev/null) || wp_headers=""
# Extract token from redirect URL (Woodpecker returns ?access_token=...)
redirect_loc=$(printf '%s' "$wp_headers" \
| grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}')
wp_token=""
if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then
wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/')
fi
# Fallback: check for user_sess cookie
if [ -z "$wp_token" ]; then
wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token=""
fi
rm -f "$cookie_jar"
if [ -z "$wp_token" ]; then
echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2
return 1
fi
# Step 5: Create persistent personal access token via Woodpecker API
# WP v3 requires CSRF header for POST operations with session tokens.
local wp_csrf
wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \
"${wp_server}/web-config.js" 2>/dev/null \
| sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf=""
local pat_resp final_token
pat_resp=$(curl -sf -X POST \
-b "user_sess=${wp_token}" \
${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \
"${wp_server}/api/user/token" \
2>/dev/null) || pat_resp=""
final_token=""
if [ -n "$pat_resp" ]; then
final_token=$(printf '%s' "$pat_resp" \
| jq -r 'if .token then .token elif .access_token then .access_token else empty end' \
2>/dev/null) || final_token=""
fi
# Use persistent token if available, otherwise use session token
final_token="${final_token:-$wp_token}"
# Save to .env
if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then
sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file"
else
printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file"
fi
export WOODPECKER_TOKEN="$final_token"
echo "Config: WOODPECKER_TOKEN generated and saved to .env"
}
# Activate a repo in Woodpecker CI.
# Usage: activate_woodpecker_repo <forge_repo>
_activate_woodpecker_repo_impl() {
local forge_repo="$1"
local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}"
# Wait for Woodpecker to become ready after stack start
local retries=0
while [ $retries -lt 10 ]; do
if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then
break
fi
retries=$((retries + 1))
sleep 2
done
if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then
echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2
return
fi
echo ""
echo "── Woodpecker repo activation ─────────────────────────"
local wp_token="${WOODPECKER_TOKEN:-}"
if [ -z "$wp_token" ]; then
echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2
echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2
return
fi
local wp_repo_id
wp_repo_id=$(curl -sf \
-H "Authorization: Bearer ${wp_token}" \
"${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \
| jq -r '.id // empty' 2>/dev/null) || true
if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})"
else
# Get Forgejo repo numeric ID for WP activation
local forge_repo_id
forge_repo_id=$(curl -sf \
-H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \
| jq -r '.id // empty' 2>/dev/null) || forge_repo_id=""
local activate_resp
activate_resp=$(curl -sf -X POST \
-H "Authorization: Bearer ${wp_token}" \
"${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \
2>/dev/null) || activate_resp=""
wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true
if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})"
# Set pipeline timeout to 5 minutes (default is 60)
if curl -sf -X PATCH \
-H "Authorization: Bearer ${wp_token}" \
-H "Content-Type: application/json" \
"${wp_server}/api/repos/${wp_repo_id}" \
-d '{"timeout": 5}' >/dev/null 2>&1; then
echo "Config: pipeline timeout set to 5 minutes"
fi
else
echo "Warning: could not activate repo in Woodpecker" >&2
echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2
fi
fi
# Store repo ID for later TOML generation
if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
_WP_REPO_ID="$wp_repo_id"
fi
}

View file

@ -13,43 +13,23 @@ FACTORY_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
if [ "${DISINTO_CONTAINER:-}" = "1" ]; then if [ "${DISINTO_CONTAINER:-}" = "1" ]; then
DISINTO_DATA_DIR="${HOME}/data" DISINTO_DATA_DIR="${HOME}/data"
DISINTO_LOG_DIR="${DISINTO_DATA_DIR}/logs" DISINTO_LOG_DIR="${DISINTO_DATA_DIR}/logs"
mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher} mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics}
else else
DISINTO_LOG_DIR="${FACTORY_ROOT}" DISINTO_LOG_DIR="${FACTORY_ROOT}"
fi fi
export DISINTO_LOG_DIR export DISINTO_LOG_DIR
# Load secrets: prefer .env.enc (SOPS-encrypted), fall back to plaintext .env. # Load secrets: prefer .env.enc (SOPS-encrypted), fall back to plaintext .env.
# Inside containers (DISINTO_CONTAINER=1), compose environment is the source of truth. # Always source .env — cron jobs inside the container do NOT inherit compose
# On bare metal, .env/.env.enc is sourced to provide default values. # env vars (FORGE_TOKEN, etc.). Compose-injected vars (like FORGE_URL) are
if [ "${DISINTO_CONTAINER:-}" != "1" ]; then # already set and won't be clobbered since env.sh uses ${VAR:-default} patterns
# for derived values. FORGE_URL from .env (localhost:3000) is overridden below
# by the compose-injected value when running via docker exec.
if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then
set -a set -a
_saved_forge_url="${FORGE_URL:-}" _saved_forge_url="${FORGE_URL:-}"
# Use temp file + validate dotenv format before sourcing (avoids eval injection) eval "$(sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" 2>/dev/null)" \
# SOPS -d automatically verifies MAC/GCM authentication tag during decryption || echo "Warning: failed to decrypt .env.enc — secrets not loaded" >&2
_tmpenv=$(mktemp) || { echo "Error: failed to create temp file for .env.enc" >&2; exit 1; }
if ! sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" > "$_tmpenv" 2>/dev/null; then
echo "Error: failed to decrypt .env.enc — decryption failed, possible corruption" >&2
rm -f "$_tmpenv"
exit 1
fi
# Validate: non-empty, non-comment lines must match KEY=value pattern
# Filter out blank lines and comments before validation
_validated=$(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$_tmpenv" 2>/dev/null || true)
if [ -n "$_validated" ]; then
# Write validated content to a second temp file and source it
_validated_env=$(mktemp)
printf '%s\n' "$_validated" > "$_validated_env"
# shellcheck source=/dev/null
source "$_validated_env"
rm -f "$_validated_env"
else
echo "Error: .env.enc decryption output failed format validation" >&2
rm -f "$_tmpenv"
exit 1
fi
rm -f "$_tmpenv"
set +a set +a
[ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url"
elif [ -f "$FACTORY_ROOT/.env" ]; then elif [ -f "$FACTORY_ROOT/.env" ]; then
@ -61,14 +41,6 @@ if [ "${DISINTO_CONTAINER:-}" != "1" ]; then
set +a set +a
[ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url"
fi fi
fi
# Allow per-container token override (#375): .env sets the default FORGE_TOKEN
# (dev-bot), then FORGE_TOKEN_OVERRIDE replaces it for containers that need a
# different Forgejo identity (e.g. dev-qwen).
if [ -n "${FORGE_TOKEN_OVERRIDE:-}" ]; then
export FORGE_TOKEN="$FORGE_TOKEN_OVERRIDE"
fi
# PATH: foundry, node, system # PATH: foundry, node, system
export PATH="${HOME}/.local/bin:${HOME}/.foundry/bin:${HOME}/.nvm/versions/node/v22.20.0/bin:/usr/local/bin:/usr/bin:/bin:${PATH}" export PATH="${HOME}/.local/bin:${HOME}/.foundry/bin:${HOME}/.nvm/versions/node/v22.20.0/bin:/usr/local/bin:/usr/bin:/bin:${PATH}"
@ -79,11 +51,16 @@ if [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then
source "${FACTORY_ROOT}/lib/load-project.sh" "$PROJECT_TOML" source "${FACTORY_ROOT}/lib/load-project.sh" "$PROJECT_TOML"
fi fi
# Forge token # Forge token: new FORGE_TOKEN > legacy CODEBERG_TOKEN
export FORGE_TOKEN="${FORGE_TOKEN:-}" if [ -z "${FORGE_TOKEN:-}" ]; then
FORGE_TOKEN="${CODEBERG_TOKEN:-}"
fi
export FORGE_TOKEN
export CODEBERG_TOKEN="${FORGE_TOKEN}" # backwards compat
# Review bot token # Review bot token: FORGE_REVIEW_TOKEN > legacy REVIEW_BOT_TOKEN
export FORGE_REVIEW_TOKEN="${FORGE_REVIEW_TOKEN:-${REVIEW_BOT_TOKEN:-}}" export FORGE_REVIEW_TOKEN="${FORGE_REVIEW_TOKEN:-${REVIEW_BOT_TOKEN:-}}"
export REVIEW_BOT_TOKEN="${FORGE_REVIEW_TOKEN}" # backwards compat
# Per-agent tokens (#747): each agent gets its own Forgejo identity. # Per-agent tokens (#747): each agent gets its own Forgejo identity.
# Falls back to FORGE_TOKEN for backwards compat with single-token setups. # Falls back to FORGE_TOKEN for backwards compat with single-token setups.
@ -92,16 +69,20 @@ export FORGE_GARDENER_TOKEN="${FORGE_GARDENER_TOKEN:-${FORGE_TOKEN}}"
export FORGE_VAULT_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" export FORGE_VAULT_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}"
export FORGE_SUPERVISOR_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}" export FORGE_SUPERVISOR_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}"
export FORGE_PREDICTOR_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}" export FORGE_PREDICTOR_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}"
export FORGE_ARCHITECT_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}" export FORGE_ACTION_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}"
# Bot usernames filter # Bot usernames filter: FORGE_BOT_USERNAMES > legacy CODEBERG_BOT_USERNAMES
export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}" export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-${CODEBERG_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,action-bot}}"
export CODEBERG_BOT_USERNAMES="${FORGE_BOT_USERNAMES}" # backwards compat
# Project config # Project config (FORGE_* preferred, CODEBERG_* fallback)
export FORGE_REPO="${FORGE_REPO:-}" export FORGE_REPO="${FORGE_REPO:-${CODEBERG_REPO:-}}"
export CODEBERG_REPO="${FORGE_REPO}" # backwards compat
export FORGE_URL="${FORGE_URL:-http://localhost:3000}" export FORGE_URL="${FORGE_URL:-http://localhost:3000}"
export FORGE_API="${FORGE_API:-${FORGE_URL}/api/v1/repos/${FORGE_REPO}}" export FORGE_API="${FORGE_API:-${FORGE_URL}/api/v1/repos/${FORGE_REPO}}"
export FORGE_WEB="${FORGE_WEB:-${FORGE_URL}/${FORGE_REPO}}" export FORGE_WEB="${FORGE_WEB:-${FORGE_URL}/${FORGE_REPO}}"
export CODEBERG_API="${FORGE_API}" # backwards compat
export CODEBERG_WEB="${FORGE_WEB}" # backwards compat
# tea CLI login name: derived from FORGE_URL (codeberg vs local forgejo) # tea CLI login name: derived from FORGE_URL (codeberg vs local forgejo)
if [ -z "${TEA_LOGIN:-}" ]; then if [ -z "${TEA_LOGIN:-}" ]; then
case "${FORGE_URL}" in case "${FORGE_URL}" in
@ -127,7 +108,7 @@ export CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"
# Vault-only token guard (#745): external-action tokens (GITHUB_TOKEN, CLAWHUB_TOKEN) # Vault-only token guard (#745): external-action tokens (GITHUB_TOKEN, CLAWHUB_TOKEN)
# must NEVER be available to agents. They live in .env.vault.enc and are injected # must NEVER be available to agents. They live in .env.vault.enc and are injected
# only into the ephemeral runner container at fire time. Unset them here so # only into the ephemeral vault-runner container at fire time. Unset them here so
# even an accidental .env inclusion cannot leak them into agent sessions. # even an accidental .env inclusion cannot leak them into agent sessions.
unset GITHUB_TOKEN 2>/dev/null || true unset GITHUB_TOKEN 2>/dev/null || true
unset CLAWHUB_TOKEN 2>/dev/null || true unset CLAWHUB_TOKEN 2>/dev/null || true
@ -137,75 +118,21 @@ unset CLAWHUB_TOKEN 2>/dev/null || true
export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1 export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1
# Shared log helper # Shared log helper
# Usage: log "message"
# Output: [2026-04-03T14:00:00Z] agent: message
# Where agent is set via LOG_AGENT variable (defaults to caller's context)
log() { log() {
local agent="${LOG_AGENT:-agent}" printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*"
} }
# ============================================================================= # Forge API helper — usage: forge_api GET /issues?state=open
# URL VALIDATION HELPER
# =============================================================================
# Validates that a URL variable matches expected patterns to prevent
# URL injection or redirection attacks (OWASP URL Redirection prevention).
# Returns 0 if valid, 1 if invalid.
# =============================================================================
validate_url() {
local url="$1"
local allowed_hosts="${2:-}"
# Must start with http:// or https://
if [[ ! "$url" =~ ^https?:// ]]; then
return 1
fi
# Extract host and reject if it contains @ (credential injection)
if [[ "$url" =~ ^https?://[^@]+@ ]]; then
return 1
fi
# If allowed_hosts is specified, validate against it
if [ -n "$allowed_hosts" ]; then
local host
host=$(echo "$url" | sed -E 's|^https?://([^/:]+).*|\1|')
local valid=false
for allowed in $allowed_hosts; do
if [ "$host" = "$allowed" ]; then
valid=true
break
fi
done
if [ "$valid" = false ]; then
return 1
fi
fi
return 0
}
# =============================================================================
# FORGE API HELPER
# =============================================================================
# Usage: forge_api GET /issues?state=open
# Validates FORGE_API before use to prevent URL injection attacks.
# =============================================================================
forge_api() { forge_api() {
local method="$1" path="$2" local method="$1" path="$2"
shift 2 shift 2
# Validate FORGE_API to prevent URL injection
if ! validate_url "$FORGE_API"; then
echo "ERROR: FORGE_API validation failed - possible URL injection attempt" >&2
return 1
fi
curl -sf -X "$method" \ curl -sf -X "$method" \
-H "Authorization: token ${FORGE_TOKEN}" \ -H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
"${FORGE_API}${path}" "$@" "${FORGE_API}${path}" "$@"
} }
# Backwards-compat alias
codeberg_api() { forge_api "$@"; }
# Paginate a Forge API GET endpoint and return all items as a merged JSON array. # Paginate a Forge API GET endpoint and return all items as a merged JSON array.
# Usage: forge_api_all /path (no existing query params) # Usage: forge_api_all /path (no existing query params)
@ -222,8 +149,7 @@ forge_api_all() {
page=1 page=1
while true; do while true; do
page_items=$(forge_api GET "${path_prefix}${sep}limit=50&page=${page}") page_items=$(forge_api GET "${path_prefix}${sep}limit=50&page=${page}")
count=$(printf '%s' "$page_items" | jq 'length' 2>/dev/null) || count=0 count=$(printf '%s' "$page_items" | jq 'length')
[ -z "$count" ] && count=0
[ "$count" -eq 0 ] && break [ "$count" -eq 0 ] && break
all_items=$(printf '%s\n%s' "$all_items" "$page_items" | jq -s 'add') all_items=$(printf '%s\n%s' "$all_items" "$page_items" | jq -s 'add')
[ "$count" -lt 50 ] && break [ "$count" -lt 50 ] && break
@ -231,31 +157,21 @@ forge_api_all() {
done done
printf '%s' "$all_items" printf '%s' "$all_items"
} }
# Backwards-compat alias
codeberg_api_all() { forge_api_all "$@"; }
# ============================================================================= # Woodpecker API helper
# WOODPECKER API HELPER
# =============================================================================
# Usage: woodpecker_api /repos/{id}/pipelines
# Validates WOODPECKER_SERVER before use to prevent URL injection attacks.
# =============================================================================
woodpecker_api() { woodpecker_api() {
local path="$1" local path="$1"
shift shift
# Validate WOODPECKER_SERVER to prevent URL injection
if ! validate_url "$WOODPECKER_SERVER"; then
echo "ERROR: WOODPECKER_SERVER validation failed - possible URL injection attempt" >&2
return 1
fi
curl -sfL \ curl -sfL \
-H "Authorization: Bearer ${WOODPECKER_TOKEN:-}" \ -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \
"${WOODPECKER_SERVER:-}/api${path}" "$@" "${WOODPECKER_SERVER}/api${path}" "$@"
} }
# Woodpecker DB query helper # Woodpecker DB query helper
wpdb() { wpdb() {
PGPASSWORD="${WOODPECKER_DB_PASSWORD:-}" psql \ PGPASSWORD="${WOODPECKER_DB_PASSWORD}" psql \
-U "${WOODPECKER_DB_USER:-woodpecker}" \ -U "${WOODPECKER_DB_USER:-woodpecker}" \
-h "${WOODPECKER_DB_HOST:-127.0.0.1}" \ -h "${WOODPECKER_DB_HOST:-127.0.0.1}" \
-d "${WOODPECKER_DB_NAME:-woodpecker}" \ -d "${WOODPECKER_DB_NAME:-woodpecker}" \

59
lib/file-action-issue.sh Normal file
View file

@ -0,0 +1,59 @@
#!/usr/bin/env bash
# file-action-issue.sh — File an action issue for a formula run
#
# Usage: source this file, then call file_action_issue.
# Requires: forge_api() from lib/env.sh, jq, lib/secret-scan.sh
#
# file_action_issue <formula_name> <title> <body>
# Sets FILED_ISSUE_NUM on success.
# Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected
# Load secret scanner
# shellcheck source=secret-scan.sh
source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh"
file_action_issue() {
local formula_name="$1" title="$2" body="$3"
FILED_ISSUE_NUM=""
# Secret scan: reject issue bodies containing embedded secrets
if ! scan_for_secrets "$body"; then
echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2
return 4
fi
# Dedup: skip if an open action issue for this formula already exists
local open_actions
open_actions=$(forge_api_all "/issues?state=open&type=issues&labels=action" 2>/dev/null || true)
if [ -n "$open_actions" ] && [ "$open_actions" != "null" ]; then
local existing
existing=$(printf '%s' "$open_actions" | \
jq --arg f "$formula_name" '[.[] | select(.title | test($f))] | length' 2>/dev/null || echo 0)
if [ "${existing:-0}" -gt 0 ]; then
return 1
fi
fi
# Fetch 'action' label ID
local action_label_id
action_label_id=$(forge_api GET "/labels" 2>/dev/null | \
jq -r '.[] | select(.name == "action") | .id' 2>/dev/null || true)
if [ -z "$action_label_id" ]; then
return 2
fi
# Create the issue
local payload result
payload=$(jq -nc \
--arg title "$title" \
--arg body "$body" \
--argjson labels "[$action_label_id]" \
'{title: $title, body: $body, labels: $labels}')
result=$(forge_api POST "/issues" -d "$payload" 2>/dev/null || true)
FILED_ISSUE_NUM=$(printf '%s' "$result" | jq -r '.number // empty' 2>/dev/null || true)
if [ -z "$FILED_ISSUE_NUM" ]; then
return 3
fi
}

View file

@ -1,101 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# forge-push.sh — push_to_forge() function
#
# Handles pushing a local clone to the Forgejo remote and verifying the push.
#
# Globals expected:
# FORGE_URL - Forge instance URL (e.g. http://localhost:3000)
# FORGE_TOKEN - API token for Forge operations (used for API verification)
# FORGE_PASS - Bot password for git HTTP push (#361: tokens rejected by Forgejo 11.x)
# FACTORY_ROOT - Root of the disinto factory
# PRIMARY_BRANCH - Primary branch name (e.g. main)
#
# Usage:
# source "${FACTORY_ROOT}/lib/forge-push.sh"
# push_to_forge <repo_root> <forge_url> <repo_slug>
# =============================================================================
set -euo pipefail
# Assert required globals are set before using this module.
_assert_forge_push_globals() {
local missing=()
[ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL")
[ -z "${FORGE_PASS:-}" ] && missing+=("FORGE_PASS")
[ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN")
[ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
[ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
if [ "${#missing[@]}" -gt 0 ]; then
echo "Error: forge-push.sh requires these globals to be set: ${missing[*]}" >&2
exit 1
fi
}
# Push local clone to the Forgejo remote.
push_to_forge() {
local repo_root="$1" forge_url="$2" repo_slug="$3"
# Build authenticated remote URL: http://dev-bot:<password>@host:port/org/repo.git
# Forgejo 11.x rejects API tokens for git HTTP push (#361); password auth works.
if [ -z "${FORGE_PASS:-}" ]; then
echo "Error: FORGE_PASS not set — cannot push to Forgejo (see #361)" >&2
return 1
fi
local auth_url
auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_PASS}@|")
local remote_url="${auth_url}/${repo_slug}.git"
# Display URL without token
local display_url="${forge_url}/${repo_slug}.git"
# Always set the remote URL to ensure credentials are current
if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then
git -C "$repo_root" remote set-url forgejo "$remote_url"
else
git -C "$repo_root" remote add forgejo "$remote_url"
fi
echo "Remote: forgejo -> ${display_url}"
# Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo)
if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then
echo "Push: skipped (local repo has no commits)"
return 0
fi
# Push all branches and tags
echo "Pushing: branches to forgejo"
if ! git -C "$repo_root" push forgejo --all 2>&1; then
echo "Error: failed to push branches to Forgejo" >&2
return 1
fi
echo "Pushing: tags to forgejo"
if ! git -C "$repo_root" push forgejo --tags 2>&1; then
echo "Error: failed to push tags to Forgejo" >&2
return 1
fi
# Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs)
local is_empty="true"
local verify_attempt
for verify_attempt in $(seq 1 5); do
local repo_info
repo_info=$(curl -sf --max-time 10 \
-H "Authorization: token ${FORGE_TOKEN}" \
"${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info=""
if [ -z "$repo_info" ]; then
is_empty="skipped"
break # API unreachable, skip verification
fi
is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"')
if [ "$is_empty" != "true" ]; then
echo "Verify: repo is not empty (push confirmed)"
break
fi
if [ "$verify_attempt" -lt 5 ]; then
sleep 2
fi
done
if [ "$is_empty" = "true" ]; then
echo "Warning: Forgejo repo still reports empty after push" >&2
return 1
fi
}

View file

@ -1,683 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# forge-setup.sh — setup_forge() and helpers for Forgejo provisioning
#
# Handles admin user creation, bot user creation, token generation,
# password resets, repo creation, and collaborator setup.
#
# Globals expected (asserted by _load_init_context):
# FORGE_URL - Forge instance URL (e.g. http://localhost:3000)
# FACTORY_ROOT - Root of the disinto factory
# PRIMARY_BRANCH - Primary branch name (e.g. main)
#
# Usage:
# source "${FACTORY_ROOT}/lib/forge-setup.sh"
# setup_forge <forge_url> <repo_slug>
# =============================================================================
set -euo pipefail
# Assert required globals are set before using this module.
_load_init_context() {
local missing=()
[ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL")
[ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
[ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
if [ "${#missing[@]}" -gt 0 ]; then
echo "Error: forge-setup.sh requires these globals to be set: ${missing[*]}" >&2
exit 1
fi
}
# Execute a command in the Forgejo container (for admin operations)
_forgejo_exec() {
local use_bare="${DISINTO_BARE:-false}"
if [ "$use_bare" = true ]; then
docker exec -u git disinto-forgejo "$@"
else
docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@"
fi
}
# Provision or connect to a local Forgejo instance.
# Creates admin + bot users, generates API tokens, stores in .env.
# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose.
setup_forge() {
local forge_url="$1"
local repo_slug="$2"
local use_bare="${DISINTO_BARE:-false}"
echo ""
echo "── Forge setup ────────────────────────────────────────"
# Check if Forgejo is already running
if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then
echo "Forgejo: ${forge_url} (already running)"
else
echo "Forgejo not reachable at ${forge_url}"
echo "Starting Forgejo via Docker..."
if ! command -v docker &>/dev/null; then
echo "Error: docker not found — needed to provision Forgejo" >&2
echo " Install Docker or start Forgejo manually at ${forge_url}" >&2
exit 1
fi
# Extract port from forge_url
local forge_port
forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|')
forge_port="${forge_port:-3000}"
if [ "$use_bare" = true ]; then
# Bare-metal mode: standalone docker run
mkdir -p "${FORGEJO_DATA_DIR}"
if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then
docker start disinto-forgejo >/dev/null 2>&1 || true
else
docker run -d \
--name disinto-forgejo \
--restart unless-stopped \
-p "${forge_port}:3000" \
-p 2222:22 \
-v "${FORGEJO_DATA_DIR}:/data" \
-e "FORGEJO__database__DB_TYPE=sqlite3" \
-e "FORGEJO__server__ROOT_URL=${forge_url}/" \
-e "FORGEJO__server__HTTP_PORT=3000" \
-e "FORGEJO__service__DISABLE_REGISTRATION=true" \
codeberg.org/forgejo/forgejo:11.0
fi
else
# Compose mode: start Forgejo via docker compose
docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo
fi
# Wait for Forgejo to become healthy
echo -n "Waiting for Forgejo to start"
local retries=0
while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do
retries=$((retries + 1))
if [ "$retries" -gt 60 ]; then
echo ""
echo "Error: Forgejo did not become ready within 60s" >&2
exit 1
fi
echo -n "."
sleep 1
done
echo " ready"
fi
# Wait for Forgejo database to accept writes (API may be ready before DB is)
echo -n "Waiting for Forgejo database"
local db_ready=false
for _i in $(seq 1 30); do
if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then
db_ready=true
break
fi
echo -n "."
sleep 1
done
echo ""
if [ "$db_ready" != true ]; then
echo "Error: Forgejo database not ready after 30s" >&2
exit 1
fi
# Create admin user if it doesn't exist
local admin_user="disinto-admin"
local admin_pass
local env_file="${FACTORY_ROOT}/.env"
# Re-read persisted admin password if available (#158)
if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then
admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-)
fi
# Generate a fresh password only when none was persisted
if [ -z "${admin_pass:-}" ]; then
admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
fi
if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
echo "Creating admin user: ${admin_user}"
local create_output
if ! create_output=$(_forgejo_exec forgejo admin user create \
--admin \
--username "${admin_user}" \
--password "${admin_pass}" \
--email "admin@disinto.local" \
--must-change-password=false 2>&1); then
echo "Error: failed to create admin user '${admin_user}':" >&2
echo " ${create_output}" >&2
exit 1
fi
# Forgejo 11.x ignores --must-change-password=false on create;
# explicitly clear the flag so basic-auth token creation works.
_forgejo_exec forgejo admin user change-password \
--username "${admin_user}" \
--password "${admin_pass}" \
--must-change-password=false
# Verify admin user was actually created
if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
echo "Error: admin user '${admin_user}' not found after creation" >&2
exit 1
fi
# Persist admin password to .env for idempotent re-runs (#158)
if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then
sed -i "s|^FORGE_ADMIN_PASS=.*|FORGE_ADMIN_PASS=${admin_pass}|" "$env_file"
else
printf 'FORGE_ADMIN_PASS=%s\n' "$admin_pass" >> "$env_file"
fi
else
echo "Admin user: ${admin_user} (already exists)"
# Only reset password if basic auth fails (#158, #267)
# Forgejo 11.x may ignore --must-change-password=false, blocking token creation
if ! curl -sf --max-time 5 -u "${admin_user}:${admin_pass}" \
"${forge_url}/api/v1/user" >/dev/null 2>&1; then
_forgejo_exec forgejo admin user change-password \
--username "${admin_user}" \
--password "${admin_pass}" \
--must-change-password=false
fi
fi
# Preserve password for Woodpecker OAuth2 token generation (#779)
_FORGE_ADMIN_PASS="$admin_pass"
# Create human user (disinto-admin) as site admin if it doesn't exist
local human_user="disinto-admin"
local human_pass
human_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
echo "Creating human user: ${human_user}"
local create_output
if ! create_output=$(_forgejo_exec forgejo admin user create \
--admin \
--username "${human_user}" \
--password "${human_pass}" \
--email "admin@disinto.local" \
--must-change-password=false 2>&1); then
echo "Error: failed to create human user '${human_user}':" >&2
echo " ${create_output}" >&2
exit 1
fi
# Forgejo 11.x ignores --must-change-password=false on create;
# explicitly clear the flag so basic-auth token creation works.
_forgejo_exec forgejo admin user change-password \
--username "${human_user}" \
--password "${human_pass}" \
--must-change-password=false
# Verify human user was actually created
if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
echo "Error: human user '${human_user}' not found after creation" >&2
exit 1
fi
echo " Human user '${human_user}' created as site admin"
else
echo "Human user: ${human_user} (already exists)"
fi
# Delete existing admin token if present (token sha1 is only returned at creation time)
local existing_token_id
existing_token_id=$(curl -sf \
-u "${admin_user}:${admin_pass}" \
"${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \
| jq -r '.[] | select(.name == "disinto-admin-token") | .id') || existing_token_id=""
if [ -n "$existing_token_id" ]; then
curl -sf -X DELETE \
-u "${admin_user}:${admin_pass}" \
"${forge_url}/api/v1/users/${admin_user}/tokens/${existing_token_id}" >/dev/null 2>&1 || true
fi
# Create admin token (fresh, so sha1 is returned)
local admin_token
admin_token=$(curl -sf -X POST \
-u "${admin_user}:${admin_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/users/${admin_user}/tokens" \
-d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \
| jq -r '.sha1 // empty') || admin_token=""
if [ -z "$admin_token" ]; then
echo "Error: failed to obtain admin API token" >&2
exit 1
fi
# Get or create human user token
local human_token
if curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
# Delete existing human token if present (token sha1 is only returned at creation time)
local existing_human_token_id
existing_human_token_id=$(curl -sf \
-u "${human_user}:${human_pass}" \
"${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \
| jq -r '.[] | select(.name == "disinto-human-token") | .id') || existing_human_token_id=""
if [ -n "$existing_human_token_id" ]; then
curl -sf -X DELETE \
-u "${human_user}:${human_pass}" \
"${forge_url}/api/v1/users/${human_user}/tokens/${existing_human_token_id}" >/dev/null 2>&1 || true
fi
# Create human token (fresh, so sha1 is returned)
human_token=$(curl -sf -X POST \
-u "${human_user}:${human_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/users/${human_user}/tokens" \
-d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \
| jq -r '.sha1 // empty') || human_token=""
if [ -n "$human_token" ]; then
# Store human token in .env
if grep -q '^HUMAN_TOKEN=' "$env_file" 2>/dev/null; then
sed -i "s|^HUMAN_TOKEN=.*|HUMAN_TOKEN=${human_token}|" "$env_file"
else
printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file"
fi
export HUMAN_TOKEN="$human_token"
echo " Human token saved (HUMAN_TOKEN)"
fi
fi
# Create bot users and tokens
# Each agent gets its own Forgejo account for identity and audit trail (#747).
# Map: bot-username -> env-var-name for the token
local -A bot_token_vars=(
[dev-bot]="FORGE_TOKEN"
[review-bot]="FORGE_REVIEW_TOKEN"
[planner-bot]="FORGE_PLANNER_TOKEN"
[gardener-bot]="FORGE_GARDENER_TOKEN"
[vault-bot]="FORGE_VAULT_TOKEN"
[supervisor-bot]="FORGE_SUPERVISOR_TOKEN"
[predictor-bot]="FORGE_PREDICTOR_TOKEN"
[architect-bot]="FORGE_ARCHITECT_TOKEN"
)
# Map: bot-username -> env-var-name for the password
# Forgejo 11.x API tokens don't work for git HTTP push (#361).
# Store passwords so agents can use password auth for git operations.
local -A bot_pass_vars=(
[dev-bot]="FORGE_PASS"
[review-bot]="FORGE_REVIEW_PASS"
[planner-bot]="FORGE_PLANNER_PASS"
[gardener-bot]="FORGE_GARDENER_PASS"
[vault-bot]="FORGE_VAULT_PASS"
[supervisor-bot]="FORGE_SUPERVISOR_PASS"
[predictor-bot]="FORGE_PREDICTOR_PASS"
[architect-bot]="FORGE_ARCHITECT_PASS"
)
# Llama bot users (local-model agents) — separate from main agents
# Each llama agent gets its own Forgejo user, token, and password
local -A llama_token_vars=(
[dev-qwen]="FORGE_TOKEN_LLAMA"
[dev-qwen-nightly]="FORGE_TOKEN_LLAMA_NIGHTLY"
)
local -A llama_pass_vars=(
[dev-qwen]="FORGE_PASS_LLAMA"
[dev-qwen-nightly]="FORGE_PASS_LLAMA_NIGHTLY"
)
local bot_user bot_pass token token_var pass_var
for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do
bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
token_var="${bot_token_vars[$bot_user]}"
# Check if bot user exists
local user_exists=false
if curl -sf --max-time 5 \
-H "Authorization: token ${admin_token}" \
"${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then
user_exists=true
fi
if [ "$user_exists" = false ]; then
echo "Creating bot user: ${bot_user}"
local create_output
if ! create_output=$(_forgejo_exec forgejo admin user create \
--username "${bot_user}" \
--password "${bot_pass}" \
--email "${bot_user}@disinto.local" \
--must-change-password=false 2>&1); then
echo "Error: failed to create bot user '${bot_user}':" >&2
echo " ${create_output}" >&2
exit 1
fi
# Forgejo 11.x ignores --must-change-password=false on create;
# explicitly clear the flag so basic-auth token creation works.
_forgejo_exec forgejo admin user change-password \
--username "${bot_user}" \
--password "${bot_pass}" \
--must-change-password=false
# Verify bot user was actually created
if ! curl -sf --max-time 5 \
-H "Authorization: token ${admin_token}" \
"${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then
echo "Error: bot user '${bot_user}' not found after creation" >&2
exit 1
fi
echo " ${bot_user} user created"
else
echo " ${bot_user} user exists (resetting password for token generation)"
# User exists but may not have a known password.
# Use admin API to reset the password so we can generate a new token.
_forgejo_exec forgejo admin user change-password \
--username "${bot_user}" \
--password "${bot_pass}" \
--must-change-password=false || {
echo "Error: failed to reset password for existing bot user '${bot_user}'" >&2
exit 1
}
fi
# Generate token via API (basic auth as the bot user — Forgejo requires
# basic auth on POST /users/{username}/tokens, token auth is rejected)
# First, try to delete existing tokens to avoid name collision
# Use bot user's own Basic Auth (we just set the password above)
local existing_token_ids
existing_token_ids=$(curl -sf \
-u "${bot_user}:${bot_pass}" \
"${forge_url}/api/v1/users/${bot_user}/tokens" 2>/dev/null \
| jq -r '.[].id // empty' 2>/dev/null) || existing_token_ids=""
# Delete any existing tokens for this user
if [ -n "$existing_token_ids" ]; then
while IFS= read -r tid; do
[ -n "$tid" ] && curl -sf -X DELETE \
-u "${bot_user}:${bot_pass}" \
"${forge_url}/api/v1/users/${bot_user}/tokens/${tid}" >/dev/null 2>&1 || true
done <<< "$existing_token_ids"
fi
token=$(curl -sf -X POST \
-u "${bot_user}:${bot_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/users/${bot_user}/tokens" \
-d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \
| jq -r '.sha1 // empty') || token=""
if [ -z "$token" ]; then
echo "Error: failed to create API token for '${bot_user}'" >&2
exit 1
fi
# Store token in .env under the per-agent variable name
if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then
sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file"
else
printf '%s=%s\n' "$token_var" "$token" >> "$env_file"
fi
export "${token_var}=${token}"
echo " ${bot_user} token generated and saved (${token_var})"
# Store password in .env for git HTTP push (#361)
# Forgejo 11.x API tokens don't work for git push; password auth does.
pass_var="${bot_pass_vars[$bot_user]}"
if grep -q "^${pass_var}=" "$env_file" 2>/dev/null; then
sed -i "s|^${pass_var}=.*|${pass_var}=${bot_pass}|" "$env_file"
else
printf '%s=%s\n' "$pass_var" "$bot_pass" >> "$env_file"
fi
export "${pass_var}=${bot_pass}"
echo " ${bot_user} password saved (${pass_var})"
# Backwards-compat aliases for dev-bot and review-bot
if [ "$bot_user" = "dev-bot" ]; then
export CODEBERG_TOKEN="$token"
elif [ "$bot_user" = "review-bot" ]; then
export REVIEW_BOT_TOKEN="$token"
fi
done
# Create llama bot users and tokens (local-model agents)
# These are separate from the main agents and get their own credentials
echo ""
echo "── Setting up llama bot users ────────────────────────────"
local llama_user llama_pass llama_token llama_token_var llama_pass_var
for llama_user in "${!llama_token_vars[@]}"; do
llama_token_var="${llama_token_vars[$llama_user]}"
llama_pass_var="${llama_pass_vars[$llama_user]}"
# Check if llama bot user exists
local llama_user_exists=false
if curl -sf --max-time 5 \
-H "Authorization: token ${admin_token}" \
"${forge_url}/api/v1/users/${llama_user}" >/dev/null 2>&1; then
llama_user_exists=true
fi
if [ "$llama_user_exists" = false ]; then
echo "Creating llama bot user: ${llama_user}"
# Generate a unique password for this user
llama_pass="llama-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
local create_output
if ! create_output=$(_forgejo_exec forgejo admin user create \
--username "${llama_user}" \
--password "${llama_pass}" \
--email "${llama_user}@disinto.local" \
--must-change-password=false 2>&1); then
echo "Error: failed to create llama bot user '${llama_user}':" >&2
echo " ${create_output}" >&2
exit 1
fi
# Forgejo 11.x ignores --must-change-password=false on create;
# explicitly clear the flag so basic-auth token creation works.
_forgejo_exec forgejo admin user change-password \
--username "${llama_user}" \
--password "${llama_pass}" \
--must-change-password=false
# Verify llama bot user was actually created
if ! curl -sf --max-time 5 \
-H "Authorization: token ${admin_token}" \
"${forge_url}/api/v1/users/${llama_user}" >/dev/null 2>&1; then
echo "Error: llama bot user '${llama_user}' not found after creation" >&2
exit 1
fi
echo " ${llama_user} user created"
else
echo " ${llama_user} user exists (resetting password for token generation)"
# User exists but may not have a known password.
# Use admin API to reset the password so we can generate a new token.
llama_pass="llama-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
_forgejo_exec forgejo admin user change-password \
--username "${llama_user}" \
--password "${llama_pass}" \
--must-change-password=false || {
echo "Error: failed to reset password for existing llama bot user '${llama_user}'" >&2
exit 1
}
fi
# Generate token via API (basic auth as the llama user)
# First, delete any existing tokens to avoid name collision
local existing_llama_token_ids
existing_llama_token_ids=$(curl -sf \
-u "${llama_user}:${llama_pass}" \
"${forge_url}/api/v1/users/${llama_user}/tokens" 2>/dev/null \
| jq -r '.[].id // empty' 2>/dev/null) || existing_llama_token_ids=""
# Delete any existing tokens for this user
if [ -n "$existing_llama_token_ids" ]; then
while IFS= read -r tid; do
[ -n "$tid" ] && curl -sf -X DELETE \
-u "${llama_user}:${llama_pass}" \
"${forge_url}/api/v1/users/${llama_user}/tokens/${tid}" >/dev/null 2>&1 || true
done <<< "$existing_llama_token_ids"
fi
llama_token=$(curl -sf -X POST \
-u "${llama_user}:${llama_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/users/${llama_user}/tokens" \
-d "{\"name\":\"disinto-${llama_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \
| jq -r '.sha1 // empty') || llama_token=""
if [ -z "$llama_token" ]; then
echo "Error: failed to create API token for '${llama_user}'" >&2
exit 1
fi
# Store token in .env under the llama-specific variable name
if grep -q "^${llama_token_var}=" "$env_file" 2>/dev/null; then
sed -i "s|^${llama_token_var}=.*|${llama_token_var}=${llama_token}|" "$env_file"
else
printf '%s=%s\n' "$llama_token_var" "$llama_token" >> "$env_file"
fi
export "${llama_token_var}=${llama_token}"
echo " ${llama_user} token generated and saved (${llama_token_var})"
# Store password in .env for git HTTP push (#361)
# Forgejo 11.x API tokens don't work for git push; password auth does.
if grep -q "^${llama_pass_var}=" "$env_file" 2>/dev/null; then
sed -i "s|^${llama_pass_var}=.*|${llama_pass_var}=${llama_pass}|" "$env_file"
else
printf '%s=%s\n' "$llama_pass_var" "$llama_pass" >> "$env_file"
fi
export "${llama_pass_var}=${llama_pass}"
echo " ${llama_user} password saved (${llama_pass_var})"
done
# Create .profile repos for all bot users (if they don't already exist)
# This runs the same logic as hire-an-agent Step 2-3 for idempotent setup
echo ""
echo "── Setting up .profile repos ────────────────────────────"
local -a bot_users=(dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot)
# Add llama bot users to .profile repo creation
for llama_user in "${!llama_token_vars[@]}"; do
bot_users+=("$llama_user")
done
local bot_user
for bot_user in "${bot_users[@]}"; do
# Check if .profile repo already exists
if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${bot_user}/.profile" >/dev/null 2>&1; then
echo " ${bot_user}/.profile already exists"
continue
fi
echo "Creating ${bot_user}/.profile repo..."
# Create the repo using the admin API to ensure it's created in the bot user's namespace
local create_output
create_output=$(curl -sf -X POST \
-u "${admin_user}:${admin_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/admin/users/${bot_user}/repos" \
-d "{\"name\":\".profile\",\"description\":\"${bot_user}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true
if echo "$create_output" | grep -q '"id":\|[0-9]'; then
echo " Created ${bot_user}/.profile (via admin API)"
else
echo " Warning: failed to create ${bot_user}/.profile: ${create_output}" >&2
fi
done
# Store FORGE_URL in .env if not already present
if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then
printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file"
fi
# Create the repo on Forgejo if it doesn't exist
local org_name="${repo_slug%%/*}"
local repo_name="${repo_slug##*/}"
# Check if repo already exists
if ! curl -sf --max-time 5 \
-H "Authorization: token ${FORGE_TOKEN}" \
"${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then
# Try creating org first (ignore if exists)
curl -sf -X POST \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/orgs" \
-d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true
# Create repo under org
if ! curl -sf -X POST \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/orgs/${org_name}/repos" \
-d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then
# Fallback: create under the human user namespace using admin endpoint
if [ -n "${admin_token:-}" ]; then
if ! curl -sf -X POST \
-H "Authorization: token ${admin_token}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/admin/users/${org_name}/repos" \
-d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then
echo "Error: failed to create repo '${repo_slug}' on Forgejo (admin endpoint)" >&2
exit 1
fi
elif [ -n "${HUMAN_TOKEN:-}" ]; then
if ! curl -sf -X POST \
-H "Authorization: token ${HUMAN_TOKEN}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/user/repos" \
-d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then
echo "Error: failed to create repo '${repo_slug}' on Forgejo (user endpoint)" >&2
exit 1
fi
else
echo "Error: failed to create repo '${repo_slug}' — no admin or human token available" >&2
exit 1
fi
fi
# Add all bot users as collaborators with appropriate permissions
# dev-bot: write (PR creation via lib/vault.sh)
# review-bot: read (PR review)
# planner-bot: write (prerequisites.md, memory)
# gardener-bot: write (backlog grooming)
# vault-bot: write (vault items)
# supervisor-bot: read (health monitoring)
# predictor-bot: read (pattern detection)
# architect-bot: write (sprint PRs)
local bot_perm
declare -A bot_permissions=(
[dev-bot]="write"
[review-bot]="read"
[planner-bot]="write"
[gardener-bot]="write"
[vault-bot]="write"
[supervisor-bot]="read"
[predictor-bot]="read"
[architect-bot]="write"
)
for bot_user in "${!bot_permissions[@]}"; do
bot_perm="${bot_permissions[$bot_user]}"
curl -sf -X PUT \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \
-d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true
done
# Add llama bot users as write collaborators for local-model agents
for llama_user in "${!llama_token_vars[@]}"; do
curl -sf -X PUT \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/repos/${repo_slug}/collaborators/${llama_user}" \
-d '{"permission":"write"}' >/dev/null 2>&1 || true
done
# Add disinto-admin as admin collaborator
curl -sf -X PUT \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/repos/${repo_slug}/collaborators/disinto-admin" \
-d '{"permission":"admin"}' >/dev/null 2>&1 || true
echo "Repo: ${repo_slug} created on Forgejo"
else
echo "Repo: ${repo_slug} (already exists on Forgejo)"
fi
echo "Forge: ${forge_url} (ready)"
}

View file

@ -1,479 +1,54 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# formula-session.sh — Shared helpers for formula-driven polling-loop agents # formula-session.sh — Shared helpers for formula-driven cron agents
# #
# Provides reusable utility functions for the common polling-loop wrapper pattern # Provides reusable functions for the common cron-wrapper + tmux-session
# used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. # pattern used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh.
# #
# Functions: # Functions:
# acquire_run_lock LOCK_FILE — PID lock with stale cleanup # acquire_cron_lock LOCK_FILE — PID lock with stale cleanup
# check_memory [MIN_MB] — skip if available RAM too low
# load_formula FORMULA_FILE — sets FORMULA_CONTENT # load_formula FORMULA_FILE — sets FORMULA_CONTENT
# build_context_block FILE [FILE ...] — sets CONTEXT_BLOCK # build_context_block FILE [FILE ...] — sets CONTEXT_BLOCK
# build_prompt_footer [EXTRA_API_LINES] — sets PROMPT_FOOTER (API ref + env) # start_formula_session SESSION WORKDIR PHASE_FILE — create tmux + claude
# build_sdk_prompt_footer [EXTRA_API] — omits phase protocol (SDK mode) # build_prompt_footer [EXTRA_API] — sets PROMPT_FOOTER (API ref + env + phase)
# formula_worktree_setup WORKTREE — isolated worktree for formula execution # run_formula_and_monitor AGENT [TIMEOUT] [CALLBACK] — session start, inject, monitor, log
# formula_prepare_profile_context — load lessons from .profile repo (pre-session) # formula_phase_callback PHASE — standard crash-recovery callback
# formula_lessons_block — return lessons block for prompt
# profile_write_journal ISSUE_NUM TITLE OUTCOME [FILES] — post-session journal
# profile_load_lessons — load lessons-learned.md into LESSONS_CONTEXT
# ensure_profile_repo [AGENT_IDENTITY] — clone/pull .profile repo
# _profile_has_repo — check if agent has .profile repo
# _count_undigested_journals — count journal entries to digest
# _profile_digest_journals — digest journals into lessons
# _profile_commit_and_push MESSAGE [FILES] — commit/push to .profile repo
# resolve_agent_identity — resolve agent user login from FORGE_TOKEN
# build_graph_section — run build-graph.py and set GRAPH_SECTION
# build_scratch_instruction SCRATCH_FILE — return context scratch instruction
# read_scratch_context SCRATCH_FILE — return scratch file content block
# ensure_ops_repo — clone/pull ops repo
# ops_commit_and_push MESSAGE [FILES] — commit/push to ops repo
# cleanup_stale_crashed_worktrees [HOURS] — thin wrapper around worktree_cleanup_stale
# #
# Requires: lib/env.sh, lib/worktree.sh sourced first for shared helpers. # Requires: lib/agent-session.sh sourced first (for create_agent_session,
# agent_kill_session, agent_inject_into_session).
# Globals used by formula_phase_callback: SESSION_NAME, PHASE_FILE,
# PROJECT_REPO_ROOT, PROMPT (set by the calling script).
# ── Run guards ─────────────────────────────────────────────────────────── # ── Cron guards ──────────────────────────────────────────────────────────
# acquire_run_lock LOCK_FILE # acquire_cron_lock LOCK_FILE
# Acquires a PID lock. Exits 0 if another instance is running. # Acquires a PID lock. Exits 0 if another instance is running.
# Sets an EXIT trap to clean up the lock file. # Sets an EXIT trap to clean up the lock file.
acquire_run_lock() { acquire_cron_lock() {
_RUN_LOCK_FILE="$1" _CRON_LOCK_FILE="$1"
if [ -f "$_RUN_LOCK_FILE" ]; then if [ -f "$_CRON_LOCK_FILE" ]; then
local lock_pid local lock_pid
lock_pid=$(cat "$_RUN_LOCK_FILE" 2>/dev/null || true) lock_pid=$(cat "$_CRON_LOCK_FILE" 2>/dev/null || true)
if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then
log "run: already running (PID $lock_pid)" log "run: already running (PID $lock_pid)"
exit 0 exit 0
fi fi
rm -f "$_RUN_LOCK_FILE" rm -f "$_CRON_LOCK_FILE"
fi fi
echo $$ > "$_RUN_LOCK_FILE" echo $$ > "$_CRON_LOCK_FILE"
trap 'rm -f "$_RUN_LOCK_FILE"' EXIT trap 'rm -f "$_CRON_LOCK_FILE"' EXIT
} }
# ── Agent identity resolution ──────────────────────────────────────────── # check_memory [MIN_MB]
# Exits 0 (skip) if available memory is below MIN_MB (default 2000).
# resolve_agent_identity check_memory() {
# Resolves the agent identity (user login) from the FORGE_TOKEN. local min_mb="${1:-2000}"
# Exports AGENT_IDENTITY (user login string). local avail_mb
# Returns 0 on success, 1 on failure. avail_mb=$(free -m | awk '/Mem:/{print $7}')
resolve_agent_identity() { if [ "${avail_mb:-0}" -lt "$min_mb" ]; then
if [ -z "${FORGE_TOKEN:-}" ]; then log "run: skipping — only ${avail_mb}MB available (need ${min_mb})"
log "WARNING: FORGE_TOKEN not set, cannot resolve agent identity" exit 0
return 1
fi fi
local forge_url="${FORGE_URL:-http://localhost:3000}"
AGENT_IDENTITY=$(curl -sf --max-time 10 \
-H "Authorization: token ${FORGE_TOKEN}" \
"${forge_url}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null) || true
if [ -z "$AGENT_IDENTITY" ]; then
log "WARNING: failed to resolve agent identity from FORGE_TOKEN"
return 1
fi
log "Resolved agent identity: ${AGENT_IDENTITY}"
return 0
}
# ── Forge remote resolution ──────────────────────────────────────────────
# resolve_forge_remote
# Resolves FORGE_REMOTE by matching FORGE_URL hostname against git remotes.
# Falls back to "origin" if no match found.
# Requires: FORGE_URL, git repo with remotes configured.
# Exports: FORGE_REMOTE (always set).
resolve_forge_remote() {
# Extract hostname from FORGE_URL (e.g., https://codeberg.org/user/repo -> codeberg.org)
_forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||; s|:.*||')
# Find git remote whose push URL matches the forge host
FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}')
# Fallback to origin if no match found
FORGE_REMOTE="${FORGE_REMOTE:-origin}"
export FORGE_REMOTE
log "forge remote: ${FORGE_REMOTE}"
}
# ── .profile repo management ──────────────────────────────────────────────
# ensure_profile_repo [AGENT_IDENTITY]
# Clones or pulls the agent's .profile repo to a local cache dir.
# Requires: FORGE_TOKEN, FORGE_URL.
# Exports PROFILE_REPO_PATH (local cache path) and PROFILE_FORMULA_PATH.
# Returns 0 on success, 1 on failure (falls back gracefully).
ensure_profile_repo() {
local agent_identity="${1:-${AGENT_IDENTITY:-}}"
if [ -z "$agent_identity" ]; then
# Try to resolve from FORGE_TOKEN
if ! resolve_agent_identity; then
log "WARNING: cannot resolve agent identity, skipping .profile repo"
return 1
fi
agent_identity="$AGENT_IDENTITY"
fi
# Define cache directory: /home/agent/data/.profile/{agent-name}
PROFILE_REPO_PATH="${HOME:-/home/agent}/data/.profile/${agent_identity}"
# Build clone URL from FORGE_URL and agent identity
local forge_url="${FORGE_URL:-http://localhost:3000}"
local auth_url
auth_url=$(printf '%s' "$forge_url" | sed "s|://|://$(whoami):${FORGE_TOKEN}@|")
local clone_url="${auth_url}/${agent_identity}/.profile.git"
# Check if already cached and up-to-date
if [ -d "${PROFILE_REPO_PATH}/.git" ]; then
log "Pulling .profile repo: ${agent_identity}/.profile"
if git -C "$PROFILE_REPO_PATH" fetch origin --quiet 2>/dev/null; then
git -C "$PROFILE_REPO_PATH" checkout main --quiet 2>/dev/null || \
git -C "$PROFILE_REPO_PATH" checkout master --quiet 2>/dev/null || true
git -C "$PROFILE_REPO_PATH" pull --ff-only origin main --quiet 2>/dev/null || \
git -C "$PROFILE_REPO_PATH" pull --ff-only origin master --quiet 2>/dev/null || true
log ".profile repo pulled: ${PROFILE_REPO_PATH}"
else
log "WARNING: failed to pull .profile repo, using cached version"
fi
else
log "Cloning .profile repo: ${agent_identity}/.profile -> ${PROFILE_REPO_PATH}"
if git clone --quiet "$clone_url" "$PROFILE_REPO_PATH" 2>/dev/null; then
log ".profile repo cloned: ${PROFILE_REPO_PATH}"
else
log "WARNING: failed to clone .profile repo ${agent_identity}/.profile — falling back to formulas/"
return 1
fi
fi
# Set formula path from .profile
PROFILE_FORMULA_PATH="${PROFILE_REPO_PATH}/formula.toml"
return 0
}
# _profile_has_repo
# Checks if the agent has a .profile repo by querying Forgejo API.
# Returns 0 if repo exists, 1 otherwise.
_profile_has_repo() {
local agent_identity="${AGENT_IDENTITY:-}"
if [ -z "$agent_identity" ]; then
if ! resolve_agent_identity; then
return 1
fi
agent_identity="$AGENT_IDENTITY"
fi
local forge_url="${FORGE_URL:-http://localhost:3000}"
local api_url="${forge_url}/api/v1/repos/${agent_identity}/.profile"
# Check if repo exists via API (returns 200 if exists, 404 if not)
if curl -sf -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${FORGE_TOKEN}" \
"$api_url" >/dev/null 2>&1; then
return 0
fi
return 1
}
# _count_undigested_journals
# Counts journal entries in .profile/journal/ excluding archive/
# Returns count via stdout.
_count_undigested_journals() {
if [ ! -d "${PROFILE_REPO_PATH:-}/journal" ]; then
echo "0"
return
fi
find "${PROFILE_REPO_PATH}/journal" -maxdepth 1 -name "*.md" -type f ! -path "*/archive/*" 2>/dev/null | wc -l
}
# _profile_digest_journals
# Runs a claude -p one-shot to digest undigested journals into lessons-learned.md
# Returns 0 on success, 1 on failure.
_profile_digest_journals() {
local agent_identity="${AGENT_IDENTITY:-}"
local model="${CLAUDE_MODEL:-opus}"
if [ -z "$agent_identity" ]; then
if ! resolve_agent_identity; then
return 1
fi
agent_identity="$AGENT_IDENTITY"
fi
local journal_dir="${PROFILE_REPO_PATH}/journal"
local knowledge_dir="${PROFILE_REPO_PATH}/knowledge"
local lessons_file="${knowledge_dir}/lessons-learned.md"
# Collect undigested journal entries
local journal_entries=""
if [ -d "$journal_dir" ]; then
for jf in "$journal_dir"/*.md; do
[ -f "$jf" ] || continue
# Skip archived entries
[[ "$jf" == */archive/* ]] && continue
local basename
basename=$(basename "$jf")
journal_entries="${journal_entries}
### ${basename}
$(cat "$jf")
"
done
fi
if [ -z "$journal_entries" ]; then
log "profile: no undigested journals to digest"
return 0
fi
# Read existing lessons if available
local existing_lessons=""
if [ -f "$lessons_file" ]; then
existing_lessons=$(cat "$lessons_file")
fi
# Build prompt for digestion
local digest_prompt="You are digesting journal entries from a developer agent's work sessions.
## Task
Condense these journal entries into abstract, transferable lessons. Rewrite lessons-learned.md entirely.
## Constraints
- Hard cap: 2KB maximum
- Abstract: patterns and heuristics, not specific issues or file paths
- Transferable: must help with future unseen work, not just recall past work
- Drop the least transferable lessons if over limit
## Existing lessons-learned.md (if any)
${existing_lessons:-<none>}
## Journal entries to digest
${journal_entries}
## Output
Write the complete, rewritten lessons-learned.md content below. No preamble, no explanation — just the file content."
# Run claude -p one-shot with same model as agent
local output
output=$(claude -p "$digest_prompt" \
--output-format json \
--dangerously-skip-permissions \
${model:+--model "$model"} \
2>>"$LOGFILE" || echo '{"result":"error"}')
# Extract content from JSON response
local lessons_content
lessons_content=$(printf '%s' "$output" | jq -r '.result // empty' 2>/dev/null || echo "")
if [ -z "$lessons_content" ]; then
log "profile: failed to digest journals"
return 1
fi
# Ensure knowledge directory exists
mkdir -p "$knowledge_dir"
# Write the lessons file (full rewrite)
printf '%s\n' "$lessons_content" > "$lessons_file"
log "profile: wrote lessons-learned.md (${#lessons_content} bytes)"
# Move digested journals to archive (if any were processed)
if [ -d "$journal_dir" ]; then
mkdir -p "${journal_dir}/archive"
local archived=0
for jf in "$journal_dir"/*.md; do
[ -f "$jf" ] || continue
[[ "$jf" == */archive/* ]] && continue
local basename
basename=$(basename "$jf")
mv "$jf" "${journal_dir}/archive/${basename}" 2>/dev/null && archived=$((archived + 1))
done
if [ "$archived" -gt 0 ]; then
log "profile: archived ${archived} journal entries"
fi
fi
return 0
}
# _profile_commit_and_push MESSAGE [FILE ...]
# Commits and pushes changes to .profile repo.
_profile_commit_and_push() {
local msg="$1"
shift
local files=("$@")
if [ ! -d "${PROFILE_REPO_PATH:-}/.git" ]; then
return 1
fi
(
cd "$PROFILE_REPO_PATH" || return 1
if [ ${#files[@]} -gt 0 ]; then
git add "${files[@]}"
else
git add -A
fi
if ! git diff --cached --quiet 2>/dev/null; then
git config user.name "${AGENT_IDENTITY}" || true
git config user.email "${AGENT_IDENTITY}@users.noreply.codeberg.org" || true
git commit -m "$msg" --no-verify 2>/dev/null || true
git push origin main --quiet 2>/dev/null || git push origin master --quiet 2>/dev/null || true
fi
)
}
# profile_load_lessons
# Pre-session: loads lessons-learned.md into LESSONS_CONTEXT for prompt injection.
# Lazy digestion: if >10 undigested journals exist, runs claude -p to digest them.
# Returns 0 on success, 1 if agent has no .profile repo (silent no-op).
# Requires: ensure_profile_repo() called, AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL, CLAUDE_MODEL.
# Exports: LESSONS_CONTEXT (the lessons file content, hard-capped at 2KB).
profile_load_lessons() {
# Check if agent has .profile repo
if ! _profile_has_repo; then
return 0 # Silent no-op
fi
# Pull .profile repo
if ! ensure_profile_repo; then
return 0 # Silent no-op
fi
# Check journal count for lazy digestion trigger
local journal_count
journal_count=$(_count_undigested_journals)
if [ "${journal_count:-0}" -gt 10 ]; then
log "profile: digesting ${journal_count} undigested journals"
if ! _profile_digest_journals; then
log "profile: warning — journal digestion failed"
fi
fi
# Read lessons-learned.md (hard cap at 2KB)
local lessons_file="${PROFILE_REPO_PATH}/knowledge/lessons-learned.md"
LESSONS_CONTEXT=""
if [ -f "$lessons_file" ]; then
local lessons_content
lessons_content=$(head -c 2048 "$lessons_file" 2>/dev/null) || lessons_content=""
if [ -n "$lessons_content" ]; then
# shellcheck disable=SC2034 # exported to caller for prompt injection
LESSONS_CONTEXT="## Lessons learned (from .profile/knowledge/lessons-learned.md)
${lessons_content}"
log "profile: loaded lessons-learned.md (${#lessons_content} bytes)"
fi
fi
return 0
}
# formula_prepare_profile_context
# Pre-session: loads lessons from .profile repo and sets LESSONS_CONTEXT for prompt injection.
# Single shared function to avoid duplicate boilerplate across agent scripts.
# Requires: AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL (via profile_load_lessons).
# Exports: LESSONS_CONTEXT (set by profile_load_lessons).
# Returns 0 on success, 1 if agent has no .profile repo (silent no-op).
formula_prepare_profile_context() {
profile_load_lessons || true
LESSONS_INJECTION="${LESSONS_CONTEXT:-}"
}
# formula_lessons_block
# Returns a formatted lessons block for prompt injection.
# Usage: LESSONS_BLOCK=$(formula_lessons_block)
# Expects: LESSONS_INJECTION to be set by formula_prepare_profile_context.
# Returns: formatted block or empty string.
formula_lessons_block() {
if [ -n "${LESSONS_INJECTION:-}" ]; then
printf '\n## Lessons learned (from .profile/knowledge/lessons-learned.md)\n%s' "$LESSONS_INJECTION"
fi
}
# profile_write_journal ISSUE_NUM ISSUE_TITLE OUTCOME [FILES_CHANGED]
# Post-session: writes a reflection journal entry after work completes.
# Returns 0 on success, 1 on failure.
# Requires: AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL, CLAUDE_MODEL.
# Args:
# $1 - ISSUE_NUM: The issue number worked on
# $2 - ISSUE_TITLE: The issue title
# $3 - OUTCOME: Session outcome (merged, blocked, failed, etc.)
# $4 - FILES_CHANGED: Optional comma-separated list of files changed
profile_write_journal() {
local issue_num="$1"
local issue_title="$2"
local outcome="$3"
local files_changed="${4:-}"
# Check if agent has .profile repo
if ! _profile_has_repo; then
return 0 # Silent no-op
fi
# Pull .profile repo
if ! ensure_profile_repo; then
return 0 # Silent no-op
fi
# Build session summary
local session_summary=""
if [ -n "$files_changed" ]; then
session_summary="Files changed: ${files_changed}
"
fi
session_summary="${session_summary}Outcome: ${outcome}"
# Build reflection prompt
local reflection_prompt="You are reflecting on a development session. Write a concise journal entry about transferable lessons learned.
## Session context
- Issue: #${issue_num} — ${issue_title}
- Outcome: ${outcome}
${session_summary}
## Task
Write a journal entry focused on what you learned that would help you do similar work better next time.
## Constraints
- Be concise (100-200 words)
- Focus on transferable lessons, not a summary of what you did
- Abstract patterns and heuristics, not specific issue/file references
- One concise entry, not a list
## Output
Write the journal entry below. Use markdown format."
# Run claude -p one-shot with same model as agent
local output
output=$(claude -p "$reflection_prompt" \
--output-format json \
--dangerously-skip-permissions \
${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} \
2>>"$LOGFILE" || echo '{"result":"error"}')
# Extract content from JSON response
local journal_content
journal_content=$(printf '%s' "$output" | jq -r '.result // empty' 2>/dev/null || echo "")
if [ -z "$journal_content" ]; then
log "profile: failed to write journal entry"
return 1
fi
# Ensure journal directory exists
local journal_dir="${PROFILE_REPO_PATH}/journal"
mkdir -p "$journal_dir"
# Write journal entry with timestamped filename for accumulation
local ts
ts=$(date -u +%Y%m%d-%H%M%S)
local journal_file="${journal_dir}/issue-${issue_num}-${ts}.md"
printf '%s\n' "$journal_content" >> "$journal_file"
log "profile: wrote journal entry for issue #${issue_num} (${ts})"
# Commit and push to .profile repo
_profile_commit_and_push "journal: issue #${issue_num} reflection (${ts})" "journal/issue-${issue_num}-${ts}.md"
return 0
} }
# ── Formula loading ────────────────────────────────────────────────────── # ── Formula loading ──────────────────────────────────────────────────────
@ -490,60 +65,6 @@ load_formula() {
FORMULA_CONTENT=$(cat "$formula_file") FORMULA_CONTENT=$(cat "$formula_file")
} }
# load_formula_or_profile [ROLE] [FORMULA_FILE]
# Tries to load formula from .profile repo first, falls back to formulas/<role>.toml.
# Requires: AGENT_IDENTITY, ensure_profile_repo() available.
# Exports: FORMULA_CONTENT, FORMULA_SOURCE (either ".profile" or "formulas/").
# Returns 0 on success, 1 on failure.
load_formula_or_profile() {
local role="${1:-}"
local fallback_formula="${2:-}"
# Try to load from .profile repo
if [ -n "$AGENT_IDENTITY" ] && ensure_profile_repo "$AGENT_IDENTITY"; then
if [ -f "$PROFILE_FORMULA_PATH" ]; then
log "formula source: .profile (${PROFILE_FORMULA_PATH})"
# shellcheck disable=SC2034
FORMULA_CONTENT="$(cat "$PROFILE_FORMULA_PATH")"
FORMULA_SOURCE=".profile"
return 0
else
log "WARNING: .profile repo exists but formula.toml not found at ${PROFILE_FORMULA_PATH}"
fi
fi
# Fallback to formulas/<role>.toml
if [ -n "$fallback_formula" ]; then
if [ -f "$fallback_formula" ]; then
log "formula source: formulas/ (fallback) — ${fallback_formula}"
# shellcheck disable=SC2034
FORMULA_CONTENT="$(cat "$fallback_formula")"
FORMULA_SOURCE="formulas/"
return 0
else
log "ERROR: formula not found in .profile and fallback file not found: $fallback_formula"
return 1
fi
fi
# No fallback specified but role provided — construct fallback path
if [ -n "$role" ]; then
fallback_formula="${FACTORY_ROOT}/formulas/${role}.toml"
if [ -f "$fallback_formula" ]; then
log "formula source: formulas/ (fallback) — ${fallback_formula}"
# shellcheck disable=SC2034
FORMULA_CONTENT="$(cat "$fallback_formula")"
# shellcheck disable=SC2034
FORMULA_SOURCE="formulas/"
return 0
fi
fi
# No fallback specified
log "ERROR: formula not found in .profile and no fallback specified"
return 1
}
# build_context_block FILE [FILE ...] # build_context_block FILE [FILE ...]
# Reads each file from $PROJECT_REPO_ROOT and builds CONTEXT_BLOCK. # Reads each file from $PROJECT_REPO_ROOT and builds CONTEXT_BLOCK.
# Files prefixed with "ops:" are read from $OPS_REPO_ROOT instead. # Files prefixed with "ops:" are read from $OPS_REPO_ROOT instead.
@ -570,7 +91,7 @@ $(cat "$ctx_path")
done done
} }
# ── Ops repo helpers ──────────────────────────────────────────────────── # ── Ops repo helpers ─────────────────────────────────────────────────
# ensure_ops_repo # ensure_ops_repo
# Clones or pulls the ops repo so agents can read/write operational data. # Clones or pulls the ops repo so agents can read/write operational data.
@ -633,6 +154,90 @@ ops_commit_and_push() {
) )
} }
# ── Session management ───────────────────────────────────────────────────
# start_formula_session SESSION WORKDIR PHASE_FILE
# Kills stale session, resets phase file, creates a per-agent git worktree
# for session isolation, and creates a new tmux + claude session in it.
# Sets _FORMULA_SESSION_WORKDIR to the worktree path (or original workdir
# on fallback). Callers must clean up via remove_formula_worktree after
# the session ends.
# Returns 0 on success, 1 on failure.
start_formula_session() {
local session="$1" workdir="$2" phase_file="$3"
agent_kill_session "$session"
rm -f "$phase_file"
# Create per-agent git worktree for session isolation.
# Each agent gets its own CWD so Claude Code treats them as separate
# projects — no resume collisions between sequential formula runs.
_FORMULA_SESSION_WORKDIR="/tmp/disinto-${session}"
# Clean up any stale worktree from a previous run
git -C "$workdir" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true
if git -C "$workdir" worktree add "$_FORMULA_SESSION_WORKDIR" HEAD --detach 2>/dev/null; then
log "Created worktree: ${_FORMULA_SESSION_WORKDIR}"
else
log "WARNING: worktree creation failed — falling back to ${workdir}"
_FORMULA_SESSION_WORKDIR="$workdir"
fi
log "Creating tmux session: ${session}"
if ! create_agent_session "$session" "$_FORMULA_SESSION_WORKDIR" "$phase_file"; then
log "ERROR: failed to create tmux session ${session}"
return 1
fi
}
# remove_formula_worktree
# Removes the worktree created by start_formula_session if it differs from
# PROJECT_REPO_ROOT. Safe to call multiple times. No-op if no worktree was created.
remove_formula_worktree() {
if [ -n "${_FORMULA_SESSION_WORKDIR:-}" ] \
&& [ "$_FORMULA_SESSION_WORKDIR" != "${PROJECT_REPO_ROOT:-}" ]; then
git -C "$PROJECT_REPO_ROOT" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true
log "Removed worktree: ${_FORMULA_SESSION_WORKDIR}"
fi
}
# formula_phase_callback PHASE
# Standard crash-recovery phase callback for formula sessions.
# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT.
# Uses _FORMULA_CRASH_COUNT (auto-initialized) for single-retry limit.
# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller
formula_phase_callback() {
local phase="$1"
log "phase: ${phase}"
case "$phase" in
PHASE:crashed)
if [ "${_FORMULA_CRASH_COUNT:-0}" -gt 0 ]; then
log "ERROR: session crashed again after recovery — giving up"
return 0
fi
_FORMULA_CRASH_COUNT=$(( ${_FORMULA_CRASH_COUNT:-0} + 1 ))
log "WARNING: tmux session died unexpectedly — attempting recovery"
if create_agent_session "${_MONITOR_SESSION:-$SESSION_NAME}" "${_FORMULA_SESSION_WORKDIR:-$PROJECT_REPO_ROOT}" "$PHASE_FILE" 2>/dev/null; then
agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" "$PROMPT"
log "Recovery session started"
else
log "ERROR: could not restart session after crash"
fi
;;
PHASE:done|PHASE:failed|PHASE:escalate|PHASE:merged)
agent_kill_session "${_MONITOR_SESSION:-$SESSION_NAME}"
;;
esac
}
# ── Stale crashed worktree cleanup ─────────────────────────────────────────
# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS]
# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh.
# Kept for backwards compatibility with existing callers.
# Requires: lib/worktree.sh sourced.
cleanup_stale_crashed_worktrees() {
worktree_cleanup_stale "${1:-24}"
}
# ── Scratch file helpers (compaction survival) ──────────────────────────── # ── Scratch file helpers (compaction survival) ────────────────────────────
# build_scratch_instruction SCRATCH_FILE # build_scratch_instruction SCRATCH_FILE
@ -678,14 +283,8 @@ build_graph_section() {
--project-root "$PROJECT_REPO_ROOT" \ --project-root "$PROJECT_REPO_ROOT" \
--output "$report" 2>>"$LOG_FILE"; then --output "$report" 2>>"$LOG_FILE"; then
# shellcheck disable=SC2034 # shellcheck disable=SC2034
local report_content GRAPH_SECTION=$(printf '\n## Structural analysis\n```json\n%s\n```\n' \
report_content="$(cat "$report")" "$(cat "$report")")
# shellcheck disable=SC2034
GRAPH_SECTION="
## Structural analysis
\`\`\`json
${report_content}
\`\`\`"
log "graph report generated: $(jq -r '.stats | "\(.nodes) nodes, \(.edges) edges"' "$report")" log "graph report generated: $(jq -r '.stats | "\(.nodes) nodes, \(.edges) edges"' "$report")"
else else
log "WARN: build-graph.py failed — continuing without structural analysis" log "WARN: build-graph.py failed — continuing without structural analysis"
@ -708,26 +307,25 @@ build_sdk_prompt_footer() {
# Creates an isolated worktree for synchronous formula execution. # Creates an isolated worktree for synchronous formula execution.
# Fetches primary branch, cleans stale worktree, creates new one, and # Fetches primary branch, cleans stale worktree, creates new one, and
# sets an EXIT trap for cleanup. # sets an EXIT trap for cleanup.
# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH, FORGE_REMOTE. # Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH.
# Ensure resolve_forge_remote() is called before this function.
formula_worktree_setup() { formula_worktree_setup() {
local worktree="$1" local worktree="$1"
cd "$PROJECT_REPO_ROOT" || return cd "$PROJECT_REPO_ROOT" || return
git fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true
worktree_cleanup "$worktree" worktree_cleanup "$worktree"
git worktree add "$worktree" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" --detach 2>/dev/null git worktree add "$worktree" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null
# shellcheck disable=SC2064 # expand worktree now, not at trap time # shellcheck disable=SC2064 # expand worktree now, not at trap time
trap "worktree_cleanup '$worktree'" EXIT trap "worktree_cleanup '$worktree'" EXIT
} }
# ── Prompt helpers ────────────────────────────────────────────────────── # ── Prompt + monitor helpers ──────────────────────────────────────────────
# build_prompt_footer [EXTRA_API_LINES] # build_prompt_footer [EXTRA_API_LINES]
# Assembles the common forge API reference + environment block for formula prompts. # Assembles the common forge API reference + environment + phase protocol
# Sets PROMPT_FOOTER. # block for formula prompts. Sets PROMPT_FOOTER.
# Pass additional API endpoint lines (pre-formatted, newline-prefixed) via $1. # Pass additional API endpoint lines (pre-formatted, newline-prefixed) via $1.
# Requires globals: FORGE_API, FACTORY_ROOT, PROJECT_REPO_ROOT, # Requires globals: FORGE_API, FACTORY_ROOT, PROJECT_REPO_ROOT,
# PRIMARY_BRANCH. # PRIMARY_BRANCH, PHASE_FILE.
build_prompt_footer() { build_prompt_footer() {
local extra_api="${1:-}" local extra_api="${1:-}"
# shellcheck disable=SC2034 # consumed by the calling script's PROMPT # shellcheck disable=SC2034 # consumed by the calling script's PROMPT
@ -743,15 +341,66 @@ NEVER echo or include the actual token value in output — always reference \${F
FACTORY_ROOT=${FACTORY_ROOT} FACTORY_ROOT=${FACTORY_ROOT}
PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT} PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT}
OPS_REPO_ROOT=${OPS_REPO_ROOT} OPS_REPO_ROOT=${OPS_REPO_ROOT}
PRIMARY_BRANCH=${PRIMARY_BRANCH}" PRIMARY_BRANCH=${PRIMARY_BRANCH}
PHASE_FILE=${PHASE_FILE}
## Phase protocol (REQUIRED)
When all work is done:
echo 'PHASE:done' > '${PHASE_FILE}'
On unrecoverable error:
printf 'PHASE:failed\nReason: %s\n' 'describe error' > '${PHASE_FILE}'"
} }
# ── Stale crashed worktree cleanup ──────────────────────────────────────── # run_formula_and_monitor AGENT_NAME [TIMEOUT]
# Starts the formula session, injects PROMPT, monitors phase, and logs result.
# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT,
# FORGE_REPO, CLAUDE_MODEL (exported).
# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller
run_formula_and_monitor() {
local agent_name="$1"
local timeout="${2:-7200}"
local callback="${3:-formula_phase_callback}"
# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS] if ! start_formula_session "$SESSION_NAME" "$PROJECT_REPO_ROOT" "$PHASE_FILE"; then
# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh. exit 1
# Kept for backwards compatibility with existing callers. fi
# Requires: lib/worktree.sh sourced.
cleanup_stale_crashed_worktrees() { # Write phase protocol to context file for compaction survival
worktree_cleanup_stale "${1:-24}" if [ -n "${PROMPT_FOOTER:-}" ]; then
write_compact_context "$PHASE_FILE" "$PROMPT_FOOTER"
fi
agent_inject_into_session "$SESSION_NAME" "$PROMPT"
log "Prompt sent to tmux session"
log "Monitoring phase file: ${PHASE_FILE}"
_FORMULA_CRASH_COUNT=0
monitor_phase_loop "$PHASE_FILE" "$timeout" "$callback"
FINAL_PHASE=$(read_phase "$PHASE_FILE")
log "Final phase: ${FINAL_PHASE:-none}"
if [ "$FINAL_PHASE" != "PHASE:done" ]; then
case "${_MONITOR_LOOP_EXIT:-}" in
idle_prompt)
log "${agent_name}: Claude returned to prompt without writing phase signal"
;;
idle_timeout)
log "${agent_name}: timed out with no phase signal"
;;
*)
log "${agent_name} finished without PHASE:done (phase: ${FINAL_PHASE:-none}, exit: ${_MONITOR_LOOP_EXIT:-})"
;;
esac
fi
# Preserve worktree on crash for debugging; clean up on success
if [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ]; then
worktree_preserve "${_FORMULA_SESSION_WORKDIR:-}" "crashed (agent=${agent_name})"
else
remove_formula_worktree
fi
log "--- ${agent_name^} run done ---"
} }

View file

@ -1,666 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# generators — template generation functions for disinto init
#
# Generates docker-compose.yml, Dockerfile, Caddyfile, staging index, and
# deployment pipeline configs.
#
# Globals expected (must be set before sourcing):
# FACTORY_ROOT - Root of the disinto factory
# PROJECT_NAME - Project name for the project repo (defaults to 'project')
# PRIMARY_BRANCH - Primary branch name (defaults to 'main')
#
# Usage:
# source "${FACTORY_ROOT}/lib/generators.sh"
# generate_compose "$forge_port"
# generate_caddyfile
# generate_staging_index
# generate_deploy_pipelines "$repo_root" "$project_name"
# =============================================================================
set -euo pipefail
# Assert required globals are set
: "${FACTORY_ROOT:?FACTORY_ROOT must be set}"
# PROJECT_NAME defaults to 'project' if not set (env.sh may have set it from FORGE_REPO)
PROJECT_NAME="${PROJECT_NAME:-project}"
# PRIMARY_BRANCH defaults to main (env.sh may have set it to 'master')
PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}"
# Helper: extract woodpecker_repo_id from a project TOML file
# Returns empty string if not found or file doesn't exist
_get_woodpecker_repo_id() {
local toml_file="$1"
if [ -f "$toml_file" ]; then
python3 -c "
import sys, tomllib
try:
with open(sys.argv[1], 'rb') as f:
cfg = tomllib.load(f)
ci = cfg.get('ci', {})
wp_id = ci.get('woodpecker_repo_id', '0')
print(wp_id)
except Exception:
print('0')
" "$toml_file" 2>/dev/null || echo "0"
else
echo "0"
fi
}
# Find all project TOML files and extract the highest woodpecker_repo_id
# (used for the main agents service which doesn't have a per-project TOML)
_get_primary_woodpecker_repo_id() {
local projects_dir="${FACTORY_ROOT}/projects"
local max_id="0"
for toml in "${projects_dir}"/*.toml; do
[ -f "$toml" ] || continue
local repo_id
repo_id=$(_get_woodpecker_repo_id "$toml")
if [ -n "$repo_id" ] && [ "$repo_id" != "0" ]; then
# Use the first non-zero repo_id found (or highest if multiple)
if [ "$repo_id" -gt "$max_id" ] 2>/dev/null; then
max_id="$repo_id"
fi
fi
done
echo "$max_id"
}
# Parse project TOML for local-model agents and emit compose services.
# Writes service definitions to stdout; caller handles insertion into compose file.
_generate_local_model_services() {
local compose_file="$1"
local projects_dir="${FACTORY_ROOT}/projects"
local temp_file
temp_file=$(mktemp)
local has_services=false
local all_vols=""
# Find all project TOML files and extract [agents.*] sections
for toml in "${projects_dir}"/*.toml; do
[ -f "$toml" ] || continue
# Get woodpecker_repo_id for this project
local wp_repo_id
wp_repo_id=$(_get_woodpecker_repo_id "$toml")
# Parse [agents.*] sections using Python - output YAML-compatible format
while IFS='=' read -r key value; do
case "$key" in
NAME) service_name="$value" ;;
BASE_URL) base_url="$value" ;;
MODEL) model="$value" ;;
ROLES) roles="$value" ;;
API_KEY) api_key="$value" ;;
FORGE_USER) forge_user="$value" ;;
COMPACT_PCT) compact_pct="$value" ;;
POLL_INTERVAL) poll_interval_val="$value" ;;
---)
if [ -n "$service_name" ] && [ -n "$base_url" ]; then
cat >> "$temp_file" <<EOF
agents-${service_name}:
build:
context: .
dockerfile: docker/agents/Dockerfile
container_name: disinto-agents-${service_name}
restart: unless-stopped
security_opt:
- apparmor=unconfined
volumes:
- agents-${service_name}-data:/home/agent/data
- project-repos:/home/agent/repos
- \${HOME}/.claude:/home/agent/.claude
- \${HOME}/.claude.json:/home/agent/.claude.json:ro
- CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro
- \${HOME}/.ssh:/home/agent/.ssh:ro
environment:
FORGE_URL: http://forgejo:3000
# Use llama-specific credentials if available, otherwise fall back to main FORGE_TOKEN
FORGE_TOKEN: \${FORGE_TOKEN_LLAMA:-\${FORGE_TOKEN:-}}
FORGE_PASS: \${FORGE_PASS_LLAMA:-\${FORGE_PASS:-}}
FORGE_REVIEW_TOKEN: \${FORGE_REVIEW_TOKEN:-}
FORGE_BOT_USERNAMES: \${FORGE_BOT_USERNAMES:-}
AGENT_ROLES: "${roles}"
CLAUDE_TIMEOUT: \${CLAUDE_TIMEOUT:-7200}
ANTHROPIC_BASE_URL: "${base_url}"
ANTHROPIC_API_KEY: "${api_key}"
CLAUDE_MODEL: "${model}"
CLAUDE_CONFIG_DIR: /home/agent/.claude-${service_name}
CLAUDE_CREDENTIALS_DIR: /home/agent/.claude-${service_name}/credentials
CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "${compact_pct}"
CLAUDE_CODE_ATTRIBUTION_HEADER: "0"
CLAUDE_CODE_ENABLE_TELEMETRY: "0"
DISINTO_CONTAINER: "1"
PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
WOODPECKER_DATA_DIR: /woodpecker-data
WOODPECKER_REPO_ID: "${wp_repo_id}"
FORGE_BOT_USER_${service_name^^}: "${forge_user}"
POLL_INTERVAL: "${poll_interval_val}"
depends_on:
- forgejo
- woodpecker
networks:
- disinto-net
profiles: ["agents-${service_name}"]
EOF
has_services=true
fi
# Collect volume name for later
local vol_name=" agents-${service_name}-data:"
if [ -n "$all_vols" ]; then
all_vols="${all_vols}
${vol_name}"
else
all_vols="${vol_name}"
fi
service_name="" base_url="" model="" roles="" api_key="" forge_user="" compact_pct="" poll_interval_val=""
;;
esac
done < <(python3 -c '
import sys, tomllib, json, re
with open(sys.argv[1], "rb") as f:
cfg = tomllib.load(f)
agents = cfg.get("agents", {})
for name, config in agents.items():
if not isinstance(config, dict):
continue
base_url = config.get("base_url", "")
model = config.get("model", "")
if not base_url or not model:
continue
roles = config.get("roles", ["dev"])
roles_str = " ".join(roles) if isinstance(roles, list) else roles
api_key = config.get("api_key", "sk-no-key-required")
forge_user = config.get("forge_user", f"{name}-bot")
compact_pct = config.get("compact_pct", 60)
poll_interval = config.get("poll_interval", 60)
safe_name = name.lower()
safe_name = re.sub(r"[^a-z0-9]", "-", safe_name)
# Output as simple key=value lines
print(f"NAME={safe_name}")
print(f"BASE_URL={base_url}")
print(f"MODEL={model}")
print(f"ROLES={roles_str}")
print(f"API_KEY={api_key}")
print(f"FORGE_USER={forge_user}")
print(f"COMPACT_PCT={compact_pct}")
print(f"POLL_INTERVAL={poll_interval}")
print("---")
' "$toml" 2>/dev/null)
done
if [ "$has_services" = true ]; then
# Insert the services before the volumes section
local temp_compose
temp_compose=$(mktemp)
# Get everything before volumes:
sed -n '1,/^volumes:/p' "$compose_file" | sed '$d' > "$temp_compose"
# Add the services
cat "$temp_file" >> "$temp_compose"
# Add the volumes section and everything after
sed -n '/^volumes:/,$p' "$compose_file" >> "$temp_compose"
# Add local-model volumes to the volumes section
if [ -n "$all_vols" ]; then
# Find the volumes section and add the new volumes
sed -i "/^volumes:/{n;:a;n;/^[a-z]/!{s/$/\n$all_vols/;b};ba}" "$temp_compose"
fi
mv "$temp_compose" "$compose_file"
fi
rm -f "$temp_file"
}
# Generate docker-compose.yml in the factory root.
_generate_compose_impl() {
local forge_port="${1:-3000}"
local compose_file="${FACTORY_ROOT}/docker-compose.yml"
# Check if compose file already exists
if [ -f "$compose_file" ]; then
echo "Compose: ${compose_file} (already exists, skipping)"
return 0
fi
# Extract primary woodpecker_repo_id from project TOML files
local wp_repo_id
wp_repo_id=$(_get_primary_woodpecker_repo_id)
cat > "$compose_file" <<'COMPOSEEOF'
# docker-compose.yml — generated by disinto init
# Brings up Forgejo, Woodpecker, and the agent runtime.
services:
forgejo:
image: codeberg.org/forgejo/forgejo:11.0
container_name: disinto-forgejo
restart: unless-stopped
security_opt:
- apparmor=unconfined
volumes:
- forgejo-data:/data
environment:
FORGEJO__database__DB_TYPE: sqlite3
FORGEJO__server__ROOT_URL: http://forgejo:3000/
FORGEJO__server__HTTP_PORT: "3000"
FORGEJO__security__INSTALL_LOCK: "true"
FORGEJO__service__DISABLE_REGISTRATION: "true"
FORGEJO__webhook__ALLOWED_HOST_LIST: "private"
networks:
- disinto-net
woodpecker:
image: woodpeckerci/woodpecker-server:v3
container_name: disinto-woodpecker
restart: unless-stopped
security_opt:
- apparmor=unconfined
ports:
- "8000:8000"
- "9000:9000"
volumes:
- woodpecker-data:/var/lib/woodpecker
environment:
WOODPECKER_FORGEJO: "true"
WOODPECKER_FORGEJO_URL: http://forgejo:3000
WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-}
WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-}
WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000}
WOODPECKER_OPEN: "true"
WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
WOODPECKER_DATABASE_DRIVER: sqlite3
WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite
WOODPECKER_ENVIRONMENT: "FORGE_TOKEN:${FORGE_TOKEN}"
depends_on:
- forgejo
networks:
- disinto-net
woodpecker-agent:
image: woodpeckerci/woodpecker-agent:v3
container_name: disinto-woodpecker-agent
restart: unless-stopped
network_mode: host
privileged: true
security_opt:
- apparmor=unconfined
volumes:
- /var/run/docker.sock:/var/run/docker.sock
environment:
WOODPECKER_SERVER: localhost:9000
WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
WOODPECKER_GRPC_SECURE: "false"
WOODPECKER_HEALTHCHECK_ADDR: ":3333"
WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net
WOODPECKER_MAX_WORKFLOWS: 1
depends_on:
- woodpecker
agents:
build:
context: .
dockerfile: docker/agents/Dockerfile
container_name: disinto-agents
restart: unless-stopped
security_opt:
- apparmor=unconfined
volumes:
- agent-data:/home/agent/data
- project-repos:/home/agent/repos
- ${HOME}/.claude:/home/agent/.claude
- ${HOME}/.claude.json:/home/agent/.claude.json:ro
- CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro
- ${HOME}/.ssh:/home/agent/.ssh:ro
- ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro
- woodpecker-data:/woodpecker-data:ro
environment:
FORGE_URL: http://forgejo:3000
FORGE_TOKEN: ${FORGE_TOKEN:-}
FORGE_REVIEW_TOKEN: ${FORGE_REVIEW_TOKEN:-}
FORGE_PLANNER_TOKEN: ${FORGE_PLANNER_TOKEN:-}
FORGE_GARDENER_TOKEN: ${FORGE_GARDENER_TOKEN:-}
FORGE_VAULT_TOKEN: ${FORGE_VAULT_TOKEN:-}
FORGE_SUPERVISOR_TOKEN: ${FORGE_SUPERVISOR_TOKEN:-}
FORGE_PREDICTOR_TOKEN: ${FORGE_PREDICTOR_TOKEN:-}
FORGE_ARCHITECT_TOKEN: ${FORGE_ARCHITECT_TOKEN:-}
FORGE_BOT_USERNAMES: ${FORGE_BOT_USERNAMES:-}
WOODPECKER_TOKEN: ${WOODPECKER_TOKEN:-}
CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200}
CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-}
DISINTO_CONTAINER: "1"
PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
WOODPECKER_DATA_DIR: /woodpecker-data
WOODPECKER_REPO_ID: "PLACEHOLDER_WP_REPO_ID"
# IMPORTANT: agents get explicit environment variables (forge tokens, CI tokens, config).
# Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in
# .env.vault.enc and are NEVER injected here — only the runner
# container receives them at fire time (AD-006, #745).
depends_on:
- forgejo
- woodpecker
networks:
- disinto-net
runner:
build:
context: .
dockerfile: docker/agents/Dockerfile
profiles: ["vault"]
security_opt:
- apparmor=unconfined
volumes:
- agent-data:/home/agent/data
environment:
FORGE_URL: http://forgejo:3000
DISINTO_CONTAINER: "1"
PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
# Vault redesign in progress (PR-based approval, see #73-#77)
# This container is being replaced — entrypoint will be updated in follow-up
networks:
- disinto-net
# Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging
# Serves on ports 80/443, routes based on path
edge:
build: ./docker/edge
container_name: disinto-edge
security_opt:
- apparmor=unconfined
ports:
- "80:80"
- "443:443"
environment:
- DISINTO_VERSION=${DISINTO_VERSION:-main}
- FORGE_URL=http://forgejo:3000
- FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
- FORGE_OPS_REPO=${FORGE_OPS_REPO:-disinto-admin/disinto-ops}
- FORGE_TOKEN=${FORGE_TOKEN:-}
- FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin}
- FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-}
- OPS_REPO_ROOT=/opt/disinto-ops
- PROJECT_REPO_ROOT=/opt/disinto
- PRIMARY_BRANCH=main
volumes:
- ./docker/Caddyfile:/etc/caddy/Caddyfile
- caddy_data:/data
- /var/run/docker.sock:/var/run/docker.sock
depends_on:
- forgejo
- woodpecker
- staging
networks:
- disinto-net
# Staging container — static file server for staging artifacts
# Edge proxy routes to this container for default requests
staging:
image: caddy:alpine
command: ["caddy", "file-server", "--root", "/srv/site"]
security_opt:
- apparmor=unconfined
volumes:
- ./docker:/srv/site:ro
networks:
- disinto-net
# Staging deployment slot — activated by Woodpecker staging pipeline (#755).
# Profile-gated: only starts when explicitly targeted by deploy commands.
# Customize image/ports/volumes for your project after init.
staging-deploy:
image: alpine:3
profiles: ["staging"]
security_opt:
- apparmor=unconfined
environment:
DEPLOY_ENV: staging
networks:
- disinto-net
command: ["echo", "staging slot — replace with project image"]
volumes:
forgejo-data:
woodpecker-data:
agent-data:
project-repos:
caddy_data:
networks:
disinto-net:
driver: bridge
COMPOSEEOF
# Patch PROJECT_REPO_ROOT — interpolate PROJECT_NAME at generation time
# (Docker Compose cannot resolve it; it's a shell variable, not a .env var)
sed -i "s|\${PROJECT_NAME:-project}|${PROJECT_NAME}|g" "$compose_file"
# Patch WOODPECKER_REPO_ID — interpolate at generation time
# (Docker Compose cannot resolve it; it's a shell variable, not a .env var)
if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
sed -i "s|PLACEHOLDER_WP_REPO_ID|${wp_repo_id}|g" "$compose_file"
else
# Default to empty if no repo_id found (agents will handle gracefully)
sed -i "s|PLACEHOLDER_WP_REPO_ID||g" "$compose_file"
fi
# Patch the forgejo port mapping into the file if non-default
if [ "$forge_port" != "3000" ]; then
# Add port mapping to forgejo service so it's reachable from host during init
sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file"
else
sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"3000:3000\"" "$compose_file"
fi
# Append local-model agent services if any are configured
# (must run before CLAUDE_BIN_PLACEHOLDER substitution so the placeholder
# in local-model services is also resolved)
_generate_local_model_services "$compose_file"
# Patch the Claude CLI binary path — resolve from host PATH at init time.
local claude_bin
claude_bin="$(command -v claude 2>/dev/null || true)"
if [ -n "$claude_bin" ]; then
# Resolve symlinks to get the real binary path
claude_bin="$(readlink -f "$claude_bin")"
sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|g" "$compose_file"
else
echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2
sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|g" "$compose_file"
fi
echo "Created: ${compose_file}"
}
# Generate docker/agents/ files if they don't already exist.
_generate_agent_docker_impl() {
local docker_dir="${FACTORY_ROOT}/docker/agents"
mkdir -p "$docker_dir"
if [ ! -f "${docker_dir}/Dockerfile" ]; then
echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2
fi
if [ ! -f "${docker_dir}/entrypoint.sh" ]; then
echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2
fi
}
# Generate docker/Caddyfile template for edge proxy.
_generate_caddyfile_impl() {
local docker_dir="${FACTORY_ROOT}/docker"
local caddyfile="${docker_dir}/Caddyfile"
if [ -f "$caddyfile" ]; then
echo "Caddyfile: ${caddyfile} (already exists, skipping)"
return
fi
cat > "$caddyfile" <<'CADDYFILEEOF'
# Caddyfile — edge proxy configuration
# IP-only binding at bootstrap; domain + TLS added later via vault resource request
:80 {
# Reverse proxy to Forgejo
handle /forgejo/* {
reverse_proxy forgejo:3000
}
# Reverse proxy to Woodpecker CI
handle /ci/* {
reverse_proxy woodpecker:8000
}
# Default: proxy to staging container
handle {
reverse_proxy staging:80
}
}
CADDYFILEEOF
echo "Created: ${caddyfile}"
}
# Generate docker/index.html default page.
_generate_staging_index_impl() {
local docker_dir="${FACTORY_ROOT}/docker"
local index_file="${docker_dir}/index.html"
if [ -f "$index_file" ]; then
echo "Staging: ${index_file} (already exists, skipping)"
return
fi
cat > "$index_file" <<'INDEXEOF'
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Nothing shipped yet</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
display: flex;
align-items: center;
justify-content: center;
min-height: 100vh;
margin: 0;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
}
.container {
text-align: center;
padding: 2rem;
}
h1 {
font-size: 3rem;
margin: 0 0 1rem 0;
}
p {
font-size: 1.25rem;
opacity: 0.9;
}
</style>
</head>
<body>
<div class="container">
<h1>Nothing shipped yet</h1>
<p>CI pipelines will update this page with your staging artifacts.</p>
</div>
</body>
</html>
INDEXEOF
echo "Created: ${index_file}"
}
# Generate template .woodpecker/ deployment pipeline configs in a project repo.
# Creates staging.yml and production.yml alongside the project's existing CI config.
# These pipelines trigger on Woodpecker's deployment event with environment filters.
_generate_deploy_pipelines_impl() {
local repo_root="$1"
local project_name="$2"
: "${project_name// /}" # Silence SC2034 - variable used in heredoc
local wp_dir="${repo_root}/.woodpecker"
mkdir -p "$wp_dir"
# Skip if deploy pipelines already exist
if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then
echo "Deploy: .woodpecker/{staging,production}.yml (already exist)"
return
fi
if [ ! -f "${wp_dir}/staging.yml" ]; then
cat > "${wp_dir}/staging.yml" <<'STAGINGEOF'
# .woodpecker/staging.yml — Staging deployment pipeline
# Triggered by runner via Woodpecker promote API.
# Human approves promotion in vault → runner calls promote → this runs.
when:
event: deployment
environment: staging
steps:
- name: deploy-staging
image: docker:27
commands:
- echo "Deploying to staging environment..."
- echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}"
# Pull the image built by CI and deploy to staging
# Customize these commands for your project:
# - docker compose -f docker-compose.yml --profile staging up -d
- echo "Staging deployment complete"
- name: verify-staging
image: alpine:3
commands:
- echo "Verifying staging deployment..."
# Add health checks, smoke tests, or integration tests here:
# - curl -sf http://staging:8080/health || exit 1
- echo "Staging verification complete"
STAGINGEOF
echo "Created: ${wp_dir}/staging.yml"
fi
if [ ! -f "${wp_dir}/production.yml" ]; then
cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF'
# .woodpecker/production.yml — Production deployment pipeline
# Triggered by runner via Woodpecker promote API.
# Human approves promotion in vault → runner calls promote → this runs.
when:
event: deployment
environment: production
steps:
- name: deploy-production
image: docker:27
commands:
- echo "Deploying to production environment..."
- echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging"
# Pull the verified image and deploy to production
# Customize these commands for your project:
# - docker compose -f docker-compose.yml up -d
- echo "Production deployment complete"
- name: verify-production
image: alpine:3
commands:
- echo "Verifying production deployment..."
# Add production health checks here:
# - curl -sf http://production:8080/health || exit 1
- echo "Production verification complete"
PRODUCTIONEOF
echo "Created: ${wp_dir}/production.yml"
fi
}

View file

@ -1,5 +1,5 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# guard.sh — Active-state guard for polling-loop entry points # guard.sh — Active-state guard for cron entry points
# #
# Each agent checks for a state file before running. If the file # Each agent checks for a state file before running. If the file
# doesn't exist, the agent logs a skip and exits cleanly. # doesn't exist, the agent logs a skip and exits cleanly.

View file

@ -1,503 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# hire-agent — disinto_hire_an_agent() function
#
# Handles user creation, .profile repo setup, formula copying, branch protection,
# and state marker creation for hiring a new agent.
#
# Globals expected:
# FORGE_URL - Forge instance URL
# FORGE_TOKEN - Admin token for Forge operations
# FACTORY_ROOT - Root of the disinto factory
# PROJECT_NAME - Project name for email/domain generation
#
# Usage:
# source "${FACTORY_ROOT}/lib/hire-agent.sh"
# disinto_hire_an_agent <agent-name> <role> [--formula <path>] [--local-model <url>] [--model <name>] [--poll-interval <seconds>]
# =============================================================================
set -euo pipefail
disinto_hire_an_agent() {
local agent_name="${1:-}"
local role="${2:-}"
local formula_path=""
local local_model=""
local model_name=""
local poll_interval=""
if [ -z "$agent_name" ] || [ -z "$role" ]; then
echo "Error: agent-name and role required" >&2
echo "Usage: disinto hire-an-agent <agent-name> <role> [--formula <path>] [--local-model <url>] [--model <name>] [--poll-interval <seconds>]" >&2
exit 1
fi
shift 2
# Parse flags
while [ $# -gt 0 ]; do
case "$1" in
--formula)
formula_path="$2"
shift 2
;;
--local-model)
local_model="$2"
shift 2
;;
--model)
model_name="$2"
shift 2
;;
--poll-interval)
poll_interval="$2"
shift 2
;;
*)
echo "Unknown option: $1" >&2
exit 1
;;
esac
done
# Default formula path — try both naming conventions
if [ -z "$formula_path" ]; then
formula_path="${FACTORY_ROOT}/formulas/${role}.toml"
if [ ! -f "$formula_path" ]; then
formula_path="${FACTORY_ROOT}/formulas/run-${role}.toml"
fi
fi
# Validate formula exists
if [ ! -f "$formula_path" ]; then
echo "Error: formula not found at ${formula_path}" >&2
exit 1
fi
echo "── Hiring agent: ${agent_name} (${role}) ───────────────────────"
echo "Formula: ${formula_path}"
if [ -n "$local_model" ]; then
echo "Local model: ${local_model}"
echo "Model name: ${model_name:-local-model}"
echo "Poll interval: ${poll_interval:-60}s"
fi
# Ensure FORGE_TOKEN is set
if [ -z "${FORGE_TOKEN:-}" ]; then
echo "Error: FORGE_TOKEN not set" >&2
exit 1
fi
# Get Forge URL
local forge_url="${FORGE_URL:-http://localhost:3000}"
echo "Forge: ${forge_url}"
# Step 1: Create user via API (skip if exists)
echo ""
echo "Step 1: Creating user '${agent_name}' (if not exists)..."
local user_pass=""
local admin_pass=""
# Read admin password from .env for standalone runs (#184)
local env_file="${FACTORY_ROOT}/.env"
if [ -f "$env_file" ] && grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then
admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-)
fi
# Get admin token early (needed for both user creation and password reset)
local admin_user="disinto-admin"
admin_pass="${admin_pass:-admin}"
local admin_token=""
local admin_token_name
admin_token_name="temp-token-$(date +%s)"
admin_token=$(curl -sf -X POST \
-u "${admin_user}:${admin_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/users/${admin_user}/tokens" \
-d "{\"name\":\"${admin_token_name}\",\"scopes\":[\"all\"]}" 2>/dev/null \
| jq -r '.sha1 // empty') || admin_token=""
if [ -z "$admin_token" ]; then
# Token might already exist — try listing
admin_token=$(curl -sf \
-u "${admin_user}:${admin_pass}" \
"${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \
| jq -r '.[0].sha1 // empty') || admin_token=""
fi
if [ -z "$admin_token" ]; then
echo "Error: failed to obtain admin API token" >&2
echo " Cannot proceed without admin privileges" >&2
exit 1
fi
if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then
echo " User '${agent_name}' already exists"
# Reset user password so we can get a token (#184)
user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
# Use Forgejo CLI to reset password (API PATCH ignores must_change_password in Forgejo 11.x)
if _forgejo_exec forgejo admin user change-password \
--username "${agent_name}" \
--password "${user_pass}" \
--must-change-password=false >/dev/null 2>&1; then
echo " Reset password for existing user '${agent_name}'"
else
echo " Warning: could not reset password for existing user" >&2
fi
else
# Create user using basic auth (admin token fallback would poison subsequent calls)
# Create the user
user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
if curl -sf -X POST \
-u "${admin_user}:${admin_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/admin/users" \
-d "{\"username\":\"${agent_name}\",\"password\":\"${user_pass}\",\"email\":\"${agent_name}@${PROJECT_NAME:-disinto}.local\",\"full_name\":\"${agent_name}\",\"active\":true,\"admin\":false,\"must_change_password\":false}" >/dev/null 2>&1; then
echo " Created user '${agent_name}'"
else
echo " Warning: failed to create user via admin API" >&2
# Try alternative: user might already exist
if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then
echo " User '${agent_name}' exists (confirmed)"
else
echo " Error: failed to create user '${agent_name}'" >&2
exit 1
fi
fi
fi
# Step 1.5: Generate Forge token for the new/existing user
echo ""
echo "Step 1.5: Generating Forge token for '${agent_name}'..."
# Convert role to uppercase token variable name (e.g., architect -> FORGE_ARCHITECT_TOKEN)
local role_upper
role_upper=$(echo "$role" | tr '[:lower:]' '[:upper:]')
local token_var="FORGE_${role_upper}_TOKEN"
# Generate token using the user's password (basic auth)
local agent_token=""
agent_token=$(curl -sf -X POST \
-u "${agent_name}:${user_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/users/${agent_name}/tokens" \
-d "{\"name\":\"disinto-${agent_name}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \
| jq -r '.sha1 // empty') || agent_token=""
if [ -z "$agent_token" ]; then
# Token name collision — create with timestamp suffix
agent_token=$(curl -sf -X POST \
-u "${agent_name}:${user_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/users/${agent_name}/tokens" \
-d "{\"name\":\"disinto-${agent_name}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \
| jq -r '.sha1 // empty') || agent_token=""
fi
if [ -z "$agent_token" ]; then
echo " Warning: failed to create API token for '${agent_name}'" >&2
else
# Store token in .env under the role-specific variable name
if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then
# Use sed with alternative delimiter and proper escaping for special chars in token
local escaped_token
escaped_token=$(printf '%s\n' "$agent_token" | sed 's/[&/\]/\\&/g')
sed -i "s|^${token_var}=.*|${token_var}=${escaped_token}|" "$env_file"
echo " ${agent_name} token updated (${token_var})"
else
printf '%s=%s\n' "$token_var" "$agent_token" >> "$env_file"
echo " ${agent_name} token saved (${token_var})"
fi
export "${token_var}=${agent_token}"
fi
# Step 2: Create .profile repo on Forgejo
echo ""
echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..."
if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${agent_name}/.profile" >/dev/null 2>&1; then
echo " Repo '${agent_name}/.profile' already exists"
else
# Create the repo using the admin API to ensure it's created in the agent's namespace.
# Using POST /api/v1/user/repos with a user token would create the repo under the
# authenticated user, which could be wrong if the token belongs to a different user.
# The admin API POST /api/v1/admin/users/{username}/repos explicitly creates in the
# specified user's namespace.
local create_output
create_output=$(curl -sf -X POST \
-u "${admin_user}:${admin_pass}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/admin/users/${agent_name}/repos" \
-d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true
if echo "$create_output" | grep -q '"id":\|[0-9]'; then
echo " Created repo '${agent_name}/.profile' (via admin API)"
else
echo " Error: failed to create repo '${agent_name}/.profile'" >&2
echo " Response: ${create_output}" >&2
exit 1
fi
fi
# Step 3: Clone repo and create initial commit
echo ""
echo "Step 3: Cloning repo and creating initial commit..."
local clone_dir="/tmp/.profile-clone-${agent_name}"
rm -rf "$clone_dir"
mkdir -p "$clone_dir"
# Build authenticated clone URL using basic auth (user_pass is always set in Step 1)
if [ -z "${user_pass:-}" ]; then
echo " Error: no user password available for cloning" >&2
exit 1
fi
local auth_url
auth_url=$(printf '%s' "$forge_url" | sed "s|://|://${agent_name}:${user_pass}@|")
auth_url="${auth_url}/${agent_name}/.profile.git"
# Display unauthenticated URL (auth token only in actual git clone command)
echo " Cloning: ${forge_url}/${agent_name}/.profile.git"
# Try authenticated clone first (required for private repos)
if ! git clone --quiet "$auth_url" "$clone_dir" 2>/dev/null; then
echo " Error: failed to clone repo with authentication" >&2
echo " Note: Ensure the user has a valid API token with repository access" >&2
rm -rf "$clone_dir"
exit 1
fi
# Configure git
git -C "$clone_dir" config user.name "disinto-admin"
git -C "$clone_dir" config user.email "disinto-admin@localhost"
# Create directory structure
echo " Creating directory structure..."
mkdir -p "${clone_dir}/journal"
mkdir -p "${clone_dir}/knowledge"
touch "${clone_dir}/journal/.gitkeep"
touch "${clone_dir}/knowledge/.gitkeep"
# Copy formula
echo " Copying formula..."
cp "$formula_path" "${clone_dir}/formula.toml"
# Create README
if [ ! -f "${clone_dir}/README.md" ]; then
cat > "${clone_dir}/README.md" <<EOF
# ${agent_name}'s .profile
Agent profile repository for ${agent_name}.
## Structure
\`\`\`
${agent_name}/.profile/
├── formula.toml # Agent's role formula
├── journal/ # Issue-by-issue log files (journal branch)
│ └── .gitkeep
├── knowledge/ # Shared knowledge and best practices
│ └── .gitkeep
└── README.md
\`\`\`
## Branches
- \`main\` — Admin-only merge for formula changes (requires 1 approval)
- \`journal\` — Agent branch for direct journal entries
- Agent can push directly to this branch
- Formula changes must go through PR to \`main\`
## Branch protection
- \`main\`: Protected — requires 1 admin approval for merges
- \`journal\`: Unprotected — agent can push directly
EOF
fi
# Commit and push
echo " Committing and pushing..."
git -C "$clone_dir" add -A
if ! git -C "$clone_dir" diff --cached --quiet 2>/dev/null; then
git -C "$clone_dir" commit -m "chore: initial .profile setup" -q
git -C "$clone_dir" push origin main >/dev/null 2>&1 || \
git -C "$clone_dir" push origin master >/dev/null 2>&1 || true
echo " Committed: initial .profile setup"
else
echo " No changes to commit"
fi
rm -rf "$clone_dir"
# Step 4: Set up branch protection
echo ""
echo "Step 4: Setting up branch protection..."
# Source branch-protection.sh helper
local bp_script="${FACTORY_ROOT}/lib/branch-protection.sh"
if [ -f "$bp_script" ]; then
# Source required environment
if [ -f "${FACTORY_ROOT}/lib/env.sh" ]; then
source "${FACTORY_ROOT}/lib/env.sh"
fi
# Set up branch protection for .profile repo
if source "$bp_script" 2>/dev/null && setup_profile_branch_protection "${agent_name}/.profile" "main"; then
echo " Branch protection configured for main branch"
echo " - Requires 1 approval before merge"
echo " - Admin-only merge enforcement"
echo " - Journal branch created for direct agent pushes"
else
echo " Warning: could not configure branch protection (Forgejo API may not be available)"
echo " Note: Branch protection can be set up manually later"
fi
else
echo " Warning: branch-protection.sh not found at ${bp_script}"
fi
# Step 5: Create state marker
echo ""
echo "Step 5: Creating state marker..."
local state_dir="${FACTORY_ROOT}/state"
mkdir -p "$state_dir"
local state_file="${state_dir}/.${role}-active"
if [ ! -f "$state_file" ]; then
touch "$state_file"
echo " Created: ${state_file}"
else
echo " State marker already exists: ${state_file}"
fi
# Step 6: Set up local model agent (if --local-model specified)
if [ -n "$local_model" ]; then
echo ""
echo "Step 6: Configuring local model agent..."
# Validate model endpoint is reachable
echo " Validating model endpoint: ${local_model}"
if ! curl -sf --max-time 10 "${local_model}/health" >/dev/null 2>&1; then
# Try /v1/chat/completions as fallback endpoint check
if ! curl -sf --max-time 10 "${local_model}/v1/chat/completions" >/dev/null 2>&1; then
echo " Warning: model endpoint may not be reachable at ${local_model}"
echo " Continuing with configuration..."
fi
else
echo " Model endpoint is reachable"
fi
# Find project TOML
local project_name="${PROJECT_NAME:-}"
local toml_file=""
if [ -n "$project_name" ]; then
toml_file="${FACTORY_ROOT}/projects/${project_name}.toml"
fi
# Fallback: find the first .toml in projects/
if [ -z "$toml_file" ] || [ ! -f "$toml_file" ]; then
for f in "${FACTORY_ROOT}/projects/"*.toml; do
if [ -f "$f" ]; then
toml_file="$f"
break
fi
done
fi
if [ -z "$toml_file" ] || [ ! -f "$toml_file" ]; then
echo " Error: no project TOML found in ${FACTORY_ROOT}/projects/" >&2
echo " Run 'disinto init' first to create a project config" >&2
exit 1
fi
echo " Project TOML: ${toml_file}"
# Derive a safe section name from the agent name (lowercase, alphanumeric+hyphens)
local section_name
section_name=$(echo "$agent_name" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/-/g')
# Default model name if not provided
local model="${model_name:-local-model}"
# Write [agents.<name>] section to the project TOML
local interval="${poll_interval:-60}"
echo " Writing [agents.${section_name}] to ${toml_file}..."
python3 -c '
import sys, re, pathlib
toml_path = sys.argv[1]
section_name = sys.argv[2]
base_url = sys.argv[3]
model = sys.argv[4]
agent_name = sys.argv[5]
role = sys.argv[6]
poll_interval = sys.argv[7]
p = pathlib.Path(toml_path)
text = p.read_text()
# Build the new section
new_section = f"""
[agents.{section_name}]
base_url = "{base_url}"
model = "{model}"
api_key = "sk-no-key-required"
roles = ["{role}"]
forge_user = "{agent_name}"
compact_pct = 60
poll_interval = {poll_interval}
"""
# Check if section already exists and replace it
pattern = rf"\[agents\.{re.escape(section_name)}\][^\[]*"
if re.search(pattern, text):
text = re.sub(pattern, new_section.strip() + "\n", text)
else:
# Remove commented-out example [agents.llama] block if present
text = re.sub(
r"\n# Local-model agents \(optional\).*?(?=\n# \[mirrors\]|\n\[mirrors\]|\Z)",
"",
text,
flags=re.DOTALL,
)
# Append before [mirrors] if it exists, otherwise at end
mirrors_match = re.search(r"\n(# )?\[mirrors\]", text)
if mirrors_match:
text = text[:mirrors_match.start()] + "\n" + new_section + text[mirrors_match.start():]
else:
text = text.rstrip() + "\n" + new_section
p.write_text(text)
' "$toml_file" "$section_name" "$local_model" "$model" "$agent_name" "$role" "$interval"
echo " Agent config written to TOML"
# Regenerate docker-compose.yml to include the new agent container
local compose_file="${FACTORY_ROOT}/docker-compose.yml"
if [ -f "$compose_file" ]; then
echo " Regenerating docker-compose.yml..."
rm -f "$compose_file"
# generate_compose is defined in the calling script (bin/disinto) via generators.sh
# Use _generate_compose_impl directly since generators.sh is already sourced
local forge_port="3000"
if [ -n "${FORGE_URL:-}" ]; then
forge_port=$(printf '%s' "$FORGE_URL" | sed -E 's|.*:([0-9]+)/?$|\1|')
forge_port="${forge_port:-3000}"
fi
_generate_compose_impl "$forge_port"
echo " Compose regenerated with agents-${section_name} service"
fi
local service_name="agents-${section_name}"
echo ""
echo " Service name: ${service_name}"
echo " Model endpoint: ${local_model}"
echo " Model: ${model}"
echo ""
echo " To start the agent, run:"
echo " docker compose --profile ${service_name} up -d ${service_name}"
fi
echo ""
echo "Done! Agent '${agent_name}' hired for role '${role}'."
echo " User: ${forge_url}/${agent_name}"
echo " Repo: ${forge_url}/${agent_name}/.profile"
echo " Formula: ${role}.toml"
}

View file

@ -43,17 +43,18 @@ _ilc_log() {
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Label ID caching — lookup once per name, cache in globals. # Label ID caching — lookup once per name, cache in globals.
# Pattern follows ci-helpers.sh (ensure_blocked_label_id).
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
declare -A _ILC_LABEL_IDS _ILC_BACKLOG_ID=""
_ILC_LABEL_IDS["backlog"]="" _ILC_IN_PROGRESS_ID=""
_ILC_LABEL_IDS["in-progress"]="" _ILC_BLOCKED_ID=""
_ILC_LABEL_IDS["blocked"]=""
# _ilc_ensure_label_id LABEL_NAME [COLOR] # _ilc_ensure_label_id VARNAME LABEL_NAME [COLOR]
# Looks up label by name, creates if missing, caches in associative array. # Generic: looks up label by name, creates if missing, caches in the named var.
_ilc_ensure_label_id() { _ilc_ensure_label_id() {
local name="$1" color="${2:-#e0e0e0}" local varname="$1" name="$2" color="${3:-#e0e0e0}"
local current="${_ILC_LABEL_IDS[$name]:-}" local current
eval "current=\"\${${varname}:-}\""
if [ -n "$current" ]; then if [ -n "$current" ]; then
printf '%s' "$current" printf '%s' "$current"
return 0 return 0
@ -70,47 +71,21 @@ _ilc_ensure_label_id() {
| jq -r '.id // empty' 2>/dev/null || true) | jq -r '.id // empty' 2>/dev/null || true)
fi fi
if [ -n "$label_id" ]; then if [ -n "$label_id" ]; then
_ILC_LABEL_IDS["$name"]="$label_id" eval "${varname}=\"${label_id}\""
fi fi
printf '%s' "$label_id" printf '%s' "$label_id"
} }
_ilc_backlog_id() { _ilc_ensure_label_id "backlog" "#0075ca"; } _ilc_backlog_id() { _ilc_ensure_label_id _ILC_BACKLOG_ID "backlog" "#0075ca"; }
_ilc_in_progress_id() { _ilc_ensure_label_id "in-progress" "#1d76db"; } _ilc_in_progress_id() { _ilc_ensure_label_id _ILC_IN_PROGRESS_ID "in-progress" "#1d76db"; }
_ilc_blocked_id() { _ilc_ensure_label_id "blocked" "#e11d48"; } _ilc_blocked_id() { _ilc_ensure_label_id _ILC_BLOCKED_ID "blocked" "#e11d48"; }
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# issue_claim — assign issue to bot, add "in-progress" label, remove "backlog". # issue_claim — add "in-progress" label, remove "backlog" label.
# Args: issue_number # Args: issue_number
# Returns: 0 on success, 1 if already assigned to another agent
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
issue_claim() { issue_claim() {
local issue="$1" local issue="$1"
# Get current bot identity
local me
me=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_URL}/api/v1/user" | jq -r '.login') || return 1
# Check current assignee
local current
current=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${issue}" | jq -r '.assignee.login // ""') || return 1
if [ -n "$current" ] && [ "$current" != "$me" ]; then
_ilc_log "issue #${issue} already assigned to ${current} — skipping"
return 1
fi
# Assign to self BEFORE adding in-progress label (issue #471).
# This ordering ensures the assignee is set by the time other pollers
# see the in-progress label, reducing the stale-detection race window.
curl -sf -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/issues/${issue}" \
-d "{\"assignees\":[\"${me}\"]}" >/dev/null 2>&1 || return 1
local ip_id bl_id local ip_id bl_id
ip_id=$(_ilc_in_progress_id) ip_id=$(_ilc_in_progress_id)
bl_id=$(_ilc_backlog_id) bl_id=$(_ilc_backlog_id)
@ -127,23 +102,14 @@ issue_claim() {
"${FORGE_API}/issues/${issue}/labels/${bl_id}" >/dev/null 2>&1 || true "${FORGE_API}/issues/${issue}/labels/${bl_id}" >/dev/null 2>&1 || true
fi fi
_ilc_log "claimed issue #${issue}" _ilc_log "claimed issue #${issue}"
return 0
} }
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# issue_release — remove "in-progress" label, add "backlog" label, clear assignee. # issue_release — remove "in-progress" label, add "backlog" label.
# Args: issue_number # Args: issue_number
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
issue_release() { issue_release() {
local issue="$1" local issue="$1"
# Clear assignee
curl -sf -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/issues/${issue}" \
-d '{"assignees":[]}' >/dev/null 2>&1 || true
local ip_id bl_id local ip_id bl_id
ip_id=$(_ilc_in_progress_id) ip_id=$(_ilc_in_progress_id)
bl_id=$(_ilc_backlog_id) bl_id=$(_ilc_backlog_id)
@ -162,27 +128,6 @@ issue_release() {
_ilc_log "released issue #${issue}" _ilc_log "released issue #${issue}"
} }
# ---------------------------------------------------------------------------
# _ilc_post_comment — Post a comment to an issue (internal helper)
# Args: issue_number body_text
# Uses a temp file to avoid large inline strings.
# ---------------------------------------------------------------------------
_ilc_post_comment() {
local issue="$1" body="$2"
local tmpfile tmpjson
tmpfile=$(mktemp /tmp/ilc-comment-XXXXXX.md)
tmpjson="${tmpfile}.json"
printf '%s' "$body" > "$tmpfile"
jq -Rs '{body:.}' < "$tmpfile" > "$tmpjson"
curl -sf -o /dev/null -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/issues/${issue}/comments" \
--data-binary @"$tmpjson" 2>/dev/null || true
rm -f "$tmpfile" "$tmpjson"
}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# issue_block — add "blocked" label, post diagnostic comment, remove in-progress. # issue_block — add "blocked" label, post diagnostic comment, remove in-progress.
# Args: issue_number reason [result_text] # Args: issue_number reason [result_text]
@ -209,9 +154,14 @@ issue_block() {
fi fi
} > "$tmpfile" } > "$tmpfile"
# Post comment using shared helper # Post comment
_ilc_post_comment "$issue" "$(cat "$tmpfile")" jq -Rs '{body:.}' < "$tmpfile" > "${tmpfile}.json"
rm -f "$tmpfile" curl -sf -o /dev/null -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/issues/${issue}/comments" \
--data-binary @"${tmpfile}.json" 2>/dev/null || true
rm -f "$tmpfile" "${tmpfile}.json"
# Remove in-progress, add blocked # Remove in-progress, add blocked
local ip_id bk_id local ip_id bk_id
@ -234,19 +184,11 @@ issue_block() {
} }
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# issue_close — clear assignee, PATCH state to closed. # issue_close — PATCH state to closed.
# Args: issue_number # Args: issue_number
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
issue_close() { issue_close() {
local issue="$1" local issue="$1"
# Clear assignee before closing
curl -sf -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/issues/${issue}" \
-d '{"assignees":[]}' >/dev/null 2>&1 || true
curl -sf -X PATCH \ curl -sf -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \ -H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \

View file

@ -10,6 +10,7 @@
# PROJECT_CONTAINERS, CHECK_PRS, CHECK_DEV_AGENT, # PROJECT_CONTAINERS, CHECK_PRS, CHECK_DEV_AGENT,
# CHECK_PIPELINE_STALL, CI_STALE_MINUTES, # CHECK_PIPELINE_STALL, CI_STALE_MINUTES,
# MIRROR_NAMES, MIRROR_URLS, MIRROR_<NAME> (per configured mirror) # MIRROR_NAMES, MIRROR_URLS, MIRROR_<NAME> (per configured mirror)
# (plus backwards-compat aliases: CODEBERG_REPO, CODEBERG_API, CODEBERG_WEB)
# #
# If no argument given, does nothing (allows poll scripts to work with # If no argument given, does nothing (allows poll scripts to work with
# plain .env fallback for backwards compatibility). # plain .env fallback for backwards compatibility).
@ -82,7 +83,7 @@ if mirrors:
# Export parsed variables. # Export parsed variables.
# Inside the agents container (DISINTO_CONTAINER=1), compose already sets the # Inside the agents container (DISINTO_CONTAINER=1), compose already sets the
# correct FORGE_URL (http://forgejo:3000) and path vars for the container # correct FORGE_URL (http://forgejo:3000) and path vars for the container
# environment. The TOML carries host-perspective values (localhost, /home/admin/…) # environment. The TOML carries host-perspective values (localhost, /home/johba/…)
# that would break container API calls and path resolution. Skip overriding # that would break container API calls and path resolution. Skip overriding
# any env var that is already set when running inside the container. # any env var that is already set when running inside the container.
while IFS='=' read -r _key _val; do while IFS='=' read -r _key _val; do
@ -99,9 +100,11 @@ export FORGE_URL="${FORGE_URL:-http://localhost:3000}"
if [ -n "$FORGE_REPO" ]; then if [ -n "$FORGE_REPO" ]; then
export FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}" export FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
export FORGE_WEB="${FORGE_URL}/${FORGE_REPO}" export FORGE_WEB="${FORGE_URL}/${FORGE_REPO}"
# Extract repo owner (first path segment of owner/repo)
export FORGE_REPO_OWNER="${FORGE_REPO%%/*}"
fi fi
# Backwards-compat aliases
export CODEBERG_REPO="${FORGE_REPO}"
export CODEBERG_API="${FORGE_API:-}"
export CODEBERG_WEB="${FORGE_WEB:-}"
# Derive PROJECT_REPO_ROOT if not explicitly set # Derive PROJECT_REPO_ROOT if not explicitly set
if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then
@ -113,55 +116,9 @@ if [ -z "${OPS_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then
export OPS_REPO_ROOT="/home/${USER}/${PROJECT_NAME}-ops" export OPS_REPO_ROOT="/home/${USER}/${PROJECT_NAME}-ops"
fi fi
# Inside the container, always derive repo paths from PROJECT_NAME — the TOML
# carries host-perspective paths that do not exist in the container filesystem.
if [ "${DISINTO_CONTAINER:-}" = "1" ] && [ -n "${PROJECT_NAME:-}" ]; then
export PROJECT_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}"
export OPS_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}-ops"
fi
# Derive FORGE_OPS_REPO if not explicitly set # Derive FORGE_OPS_REPO if not explicitly set
if [ -z "${FORGE_OPS_REPO:-}" ] && [ -n "${FORGE_REPO:-}" ]; then if [ -z "${FORGE_OPS_REPO:-}" ] && [ -n "${FORGE_REPO:-}" ]; then
export FORGE_OPS_REPO="${FORGE_REPO}-ops" export FORGE_OPS_REPO="${FORGE_REPO}-ops"
fi fi
# Parse [agents.*] sections for local-model agents
# Exports AGENT_<NAME>_BASE_URL, AGENT_<NAME>_MODEL, AGENT_<NAME>_API_KEY,
# AGENT_<NAME>_ROLES, AGENT_<NAME>_FORGE_USER, AGENT_<NAME>_COMPACT_PCT
if command -v python3 &>/dev/null; then
_AGENT_VARS=$(python3 -c "
import sys, tomllib
with open(sys.argv[1], 'rb') as f:
cfg = tomllib.load(f)
agents = cfg.get('agents', {})
for name, config in agents.items():
if not isinstance(config, dict):
continue
# Emit variables in uppercase with the agent name
if 'base_url' in config:
print(f'AGENT_{name.upper()}_BASE_URL={config[\"base_url\"]}')
if 'model' in config:
print(f'AGENT_{name.upper()}_MODEL={config[\"model\"]}')
if 'api_key' in config:
print(f'AGENT_{name.upper()}_API_KEY={config[\"api_key\"]}')
if 'roles' in config:
roles = ' '.join(config['roles']) if isinstance(config['roles'], list) else config['roles']
print(f'AGENT_{name.upper()}_ROLES={roles}')
if 'forge_user' in config:
print(f'AGENT_{name.upper()}_FORGE_USER={config[\"forge_user\"]}')
if 'compact_pct' in config:
print(f'AGENT_{name.upper()}_COMPACT_PCT={config[\"compact_pct\"]}')
" "$_PROJECT_TOML" 2>/dev/null) || true
if [ -n "$_AGENT_VARS" ]; then
while IFS='=' read -r _key _val; do
[ -z "$_key" ] && continue
export "$_key=$_val"
done <<< "$_AGENT_VARS"
fi
unset _AGENT_VARS
fi
unset _PROJECT_TOML _PROJECT_VARS _key _val unset _PROJECT_TOML _PROJECT_VARS _key _val

View file

@ -13,16 +13,7 @@ mirror_push() {
local name url local name url
for name in $MIRROR_NAMES; do for name in $MIRROR_NAMES; do
# Convert name to uppercase env var name safely (only alphanumeric allowed) url=$(eval "echo \"\$MIRROR_$(echo "$name" | tr '[:lower:]' '[:upper:]')\"") || true
local upper_name
upper_name=$(printf '%s' "$name" | tr '[:lower:]' '[:upper:]')
# Validate: only allow alphanumeric + underscore in var name
if [[ ! "$upper_name" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
continue
fi
# Use indirect expansion safely (no eval) — MIRROR_ prefix required
local varname="MIRROR_${upper_name}"
url="${!varname:-}"
[ -z "$url" ] && continue [ -z "$url" ] && continue
# Ensure remote exists with correct URL # Ensure remote exists with correct URL

View file

@ -1,368 +0,0 @@
#!/usr/bin/env bash
# ops-setup.sh — Setup ops repository (disinto-ops)
#
# Source from bin/disinto:
# source "$(dirname "$0")/../lib/ops-setup.sh"
#
# Required globals: FORGE_URL, FORGE_TOKEN, FACTORY_ROOT
# Optional: admin_token (falls back to FORGE_TOKEN for admin operations)
#
# Functions:
# setup_ops_repo <forge_url> <ops_slug> <ops_root> [primary_branch]
# - Create ops repo on Forgejo if it doesn't exist
# - Configure bot collaborators with appropriate permissions
# - Clone or initialize ops repo locally
# - Seed directory structure (vault, knowledge, evidence)
# - Export _ACTUAL_OPS_SLUG for caller to use
# migrate_ops_repo <ops_root> [primary_branch]
# - Seed missing directories/files on existing ops repos (idempotent)
# - Creates .gitkeep files and template content for canonical structure
#
# Globals modified:
# _ACTUAL_OPS_SLUG - resolved ops repo slug after setup_ops_repo completes
set -euo pipefail
setup_ops_repo() {
local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}"
local org_name="${ops_slug%%/*}"
local ops_name="${ops_slug##*/}"
echo ""
echo "── Ops repo setup ─────────────────────────────────────"
# Determine the actual ops repo location by searching across possible namespaces
# This handles cases where the repo was created under a different namespace
# due to past bugs (e.g., dev-bot/disinto-ops instead of disinto-admin/disinto-ops)
local actual_ops_slug=""
local -a possible_namespaces=( "$org_name" "dev-bot" "disinto-admin" )
local http_code
for ns in "${possible_namespaces[@]}"; do
slug="${ns}/${ops_name}"
if curl -sf --max-time 5 \
-H "Authorization: token ${FORGE_TOKEN}" \
"${forge_url}/api/v1/repos/${slug}" >/dev/null 2>&1; then
actual_ops_slug="$slug"
echo "Ops repo: ${slug} (found at ${slug})"
break
fi
done
# If not found, try to create it in the configured namespace
if [ -z "$actual_ops_slug" ]; then
echo "Creating ops repo in namespace: ${org_name}"
# Create org if it doesn't exist
curl -sf -X POST \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/orgs" \
-d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true
if curl -sf -X POST \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/orgs/${org_name}/repos" \
-d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then
actual_ops_slug="${org_name}/${ops_name}"
echo "Ops repo: ${actual_ops_slug} created on Forgejo"
else
# Fallback: use admin API to create repo under the target namespace
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
-X POST \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/admin/users/${org_name}/repos" \
-d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" 2>/dev/null || echo "0")
if [ "$http_code" = "201" ]; then
actual_ops_slug="${org_name}/${ops_name}"
echo "Ops repo: ${actual_ops_slug} created on Forgejo (via admin API)"
else
echo "Error: failed to create ops repo '${org_name}/${ops_name}' (HTTP ${http_code})" >&2
return 1
fi
fi
fi
# Configure collaborators on the ops repo
local bot_user bot_perm
declare -A bot_permissions=(
[dev-bot]="write"
[review-bot]="read"
[planner-bot]="write"
[gardener-bot]="write"
[vault-bot]="write"
[supervisor-bot]="read"
[predictor-bot]="read"
[architect-bot]="write"
)
# Add all bot users as collaborators with appropriate permissions
# vault branch protection (#77) requires:
# - Admin-only merge to main (enforced by admin_enforced: true)
# - Bots can push branches and create PRs, but cannot merge
for bot_user in "${!bot_permissions[@]}"; do
bot_perm="${bot_permissions[$bot_user]}"
if curl -sf -X PUT \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/${bot_user}" \
-d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1; then
echo " + ${bot_user} = ${bot_perm} collaborator"
else
echo " ! ${bot_user} = ${bot_perm} (already set or failed)"
fi
done
# Add disinto-admin as admin collaborator
if curl -sf -X PUT \
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
-H "Content-Type: application/json" \
"${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/disinto-admin" \
-d '{"permission":"admin"}' >/dev/null 2>&1; then
echo " + disinto-admin = admin collaborator"
else
echo " ! disinto-admin = admin (already set or failed)"
fi
# Clone ops repo locally if not present
if [ ! -d "${ops_root}/.git" ]; then
local auth_url
auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|")
local clone_url="${auth_url}/${actual_ops_slug}.git"
echo "Cloning: ops repo -> ${ops_root}"
if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then
echo "Ops repo: ${actual_ops_slug} cloned successfully"
else
echo "Initializing: ops repo at ${ops_root}"
mkdir -p "$ops_root"
git -C "$ops_root" init --initial-branch="${primary_branch}" -q
# Set remote to the actual ops repo location
git -C "$ops_root" remote add origin "${forge_url}/${actual_ops_slug}.git"
echo "Ops repo: ${actual_ops_slug} initialized locally"
fi
else
echo "Ops repo: ${ops_root} (already exists locally)"
# Verify remote is correct
local current_remote
current_remote=$(git -C "$ops_root" remote get-url origin 2>/dev/null || true)
local expected_remote="${forge_url}/${actual_ops_slug}.git"
if [ -n "$current_remote" ] && [ "$current_remote" != "$expected_remote" ]; then
echo " Fixing: remote URL from ${current_remote} to ${expected_remote}"
git -C "$ops_root" remote set-url origin "$expected_remote"
fi
fi
# Seed directory structure
local seeded=false
mkdir -p "${ops_root}/vault/pending"
mkdir -p "${ops_root}/vault/approved"
mkdir -p "${ops_root}/vault/fired"
mkdir -p "${ops_root}/vault/rejected"
mkdir -p "${ops_root}/knowledge"
mkdir -p "${ops_root}/evidence/engagement"
mkdir -p "${ops_root}/evidence/red-team"
mkdir -p "${ops_root}/evidence/holdout"
mkdir -p "${ops_root}/evidence/evolution"
mkdir -p "${ops_root}/evidence/user-test"
mkdir -p "${ops_root}/sprints"
[ -f "${ops_root}/sprints/.gitkeep" ] || { touch "${ops_root}/sprints/.gitkeep"; seeded=true; }
[ -f "${ops_root}/evidence/red-team/.gitkeep" ] || { touch "${ops_root}/evidence/red-team/.gitkeep"; seeded=true; }
[ -f "${ops_root}/evidence/holdout/.gitkeep" ] || { touch "${ops_root}/evidence/holdout/.gitkeep"; seeded=true; }
[ -f "${ops_root}/evidence/evolution/.gitkeep" ] || { touch "${ops_root}/evidence/evolution/.gitkeep"; seeded=true; }
[ -f "${ops_root}/evidence/user-test/.gitkeep" ] || { touch "${ops_root}/evidence/user-test/.gitkeep"; seeded=true; }
if [ ! -f "${ops_root}/README.md" ]; then
cat > "${ops_root}/README.md" <<OPSEOF
# ${ops_name}
Operational data for the ${ops_name%-ops} project.
## Structure
\`\`\`
${ops_name}/
├── vault/
│ ├── pending/ # vault items awaiting approval
│ ├── approved/ # approved vault items
│ ├── fired/ # executed vault items
│ └── rejected/ # rejected vault items
├── sprints/ # sprint specs written by architect agent
├── knowledge/ # shared agent knowledge and best practices
├── evidence/ # engagement data, experiment results
├── portfolio.md # addressables + observables
├── prerequisites.md # dependency graph
└── RESOURCES.md # accounts, tokens (refs), infra inventory
\`\`\`
> **Note:** Journal directories (journal/planner/ and journal/supervisor/) have been removed from the ops repo. Agent journals are now stored in each agent's .profile repo on Forgejo.
## Branch protection
- \`main\`: 2 reviewers required for vault items
- Journal/evidence commits may use lighter rules
OPSEOF
seeded=true
fi
# Copy vault policy.toml template if not already present
if [ ! -f "${ops_root}/vault/policy.toml" ]; then
local policy_src="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/policy.toml"
if [ -f "$policy_src" ]; then
cp "$policy_src" "${ops_root}/vault/policy.toml"
echo " + Copied vault/policy.toml template"
seeded=true
fi
fi
# Create stub files if they don't exist
[ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; }
[ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; }
[ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; }
# Commit and push seed content
if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then
# Auto-configure repo-local git identity if missing (#778)
if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then
git -C "$ops_root" config user.name "disinto-admin"
fi
if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then
git -C "$ops_root" config user.email "disinto-admin@localhost"
fi
git -C "$ops_root" add -A
if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then
git -C "$ops_root" commit -m "chore: seed ops repo structure" -q
# Push if remote exists
if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then
if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then
echo "Seeded: ops repo with initial structure"
else
echo "Warning: failed to push seed content to ops repo" >&2
fi
fi
fi
fi
# Export resolved slug for the caller to write back to the project TOML
_ACTUAL_OPS_SLUG="${actual_ops_slug}"
}
# migrate_ops_repo — Seed missing ops repo directories and files on existing deployments
#
# This function is idempotent — safe to run on every container start.
# It checks for missing directories/files and creates them with .gitkeep files
# or template content as appropriate.
#
# Called from entrypoint.sh after setup_ops_repo() to bring pre-#407 deployments
# up to date with the canonical ops repo structure.
migrate_ops_repo() {
local ops_root="${1:-}"
local primary_branch="${2:-main}"
# Validate ops_root argument
if [ -z "$ops_root" ]; then
# Try to determine ops_root from environment or project config
if [ -n "${OPS_REPO_ROOT:-}" ]; then
ops_root="${OPS_REPO_ROOT}"
elif [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then
source "$(dirname "$0")/load-project.sh" "$PROJECT_TOML"
ops_root="${OPS_REPO_ROOT:-}"
fi
fi
# Skip if we still don't have an ops root
if [ -z "$ops_root" ]; then
echo "migrate_ops_repo: skipping — no ops repo root determined"
return 0
fi
# Verify it's a git repo
if [ ! -d "${ops_root}/.git" ]; then
echo "migrate_ops_repo: skipping — ${ops_root} is not a git repo"
return 0
fi
echo ""
echo "── Ops repo migration ───────────────────────────────────"
echo "Checking ${ops_root} for missing directories and files..."
local migrated=false
# Canonical ops repo structure (post #407)
# Directories to ensure exist with .gitkeep files
local -a dir_keepfiles=(
"${ops_root}/vault/pending/.gitkeep"
"${ops_root}/vault/approved/.gitkeep"
"${ops_root}/vault/fired/.gitkeep"
"${ops_root}/vault/rejected/.gitkeep"
"${ops_root}/knowledge/.gitkeep"
"${ops_root}/evidence/engagement/.gitkeep"
"${ops_root}/evidence/red-team/.gitkeep"
"${ops_root}/evidence/holdout/.gitkeep"
"${ops_root}/evidence/evolution/.gitkeep"
"${ops_root}/evidence/user-test/.gitkeep"
"${ops_root}/sprints/.gitkeep"
)
# Create missing directories and .gitkeep files
for keepfile in "${dir_keepfiles[@]}"; do
local dir
dir=$(dirname "$keepfile")
if [ ! -f "$keepfile" ]; then
mkdir -p "$dir"
touch "$keepfile"
echo " + Created: ${keepfile}"
migrated=true
fi
done
# Template files to create if missing (starter content)
local -a template_files=(
"${ops_root}/portfolio.md"
"${ops_root}/prerequisites.md"
"${ops_root}/RESOURCES.md"
)
for tfile in "${template_files[@]}"; do
if [ ! -f "$tfile" ]; then
local title
title=$(basename "$tfile" | sed 's/\.md$//; s/_/ /g' | sed 's/\b\(.\)/\u\1/g')
{
echo "# ${title}"
echo ""
echo "## Overview"
echo ""
echo "<!-- Add content here -->"
} > "$tfile"
echo " + Created: ${tfile}"
migrated=true
fi
done
# Commit and push changes if any were made
if [ "$migrated" = true ]; then
# Auto-configure repo-local git identity if missing
if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then
git -C "$ops_root" config user.name "disinto-admin"
fi
if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then
git -C "$ops_root" config user.email "disinto-admin@localhost"
fi
git -C "$ops_root" add -A
if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then
git -C "$ops_root" commit -m "chore: migrate ops repo structure to canonical layout" -q
# Push if remote exists
if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then
if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then
echo "Migrated: ops repo structure updated and pushed"
else
echo "Warning: failed to push migration to ops repo" >&2
fi
fi
fi
else
echo " (all directories and files already present)"
fi
}

View file

@ -61,15 +61,13 @@ _prl_log() {
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# pr_create — Create a PR via forge API. # pr_create — Create a PR via forge API.
# Args: branch title body [base_branch] [api_url] # Args: branch title body [base_branch]
# Stdout: PR number # Stdout: PR number
# Returns: 0=created (or found existing), 1=failed # Returns: 0=created (or found existing), 1=failed
# api_url defaults to FORGE_API if not provided
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
pr_create() { pr_create() {
local branch="$1" title="$2" body="$3" local branch="$1" title="$2" body="$3"
local base="${4:-${PRIMARY_BRANCH:-main}}" local base="${4:-${PRIMARY_BRANCH:-main}}"
local api_url="${5:-${FORGE_API}}"
local tmpfile resp http_code resp_body pr_num local tmpfile resp http_code resp_body pr_num
tmpfile=$(mktemp /tmp/prl-create-XXXXXX.json) tmpfile=$(mktemp /tmp/prl-create-XXXXXX.json)
@ -79,7 +77,7 @@ pr_create() {
resp=$(curl -s -w "\n%{http_code}" -X POST \ resp=$(curl -s -w "\n%{http_code}" -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \ -H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
"${api_url}/pulls" \ "${FORGE_API}/pulls" \
--data-binary @"$tmpfile") || true --data-binary @"$tmpfile") || true
rm -f "$tmpfile" rm -f "$tmpfile"
@ -94,7 +92,7 @@ pr_create() {
return 0 return 0
;; ;;
409) 409)
pr_num=$(pr_find_by_branch "$branch" "$api_url") || true pr_num=$(pr_find_by_branch "$branch") || true
if [ -n "$pr_num" ]; then if [ -n "$pr_num" ]; then
_prl_log "PR already exists: #${pr_num}" _prl_log "PR already exists: #${pr_num}"
printf '%s' "$pr_num" printf '%s' "$pr_num"
@ -112,17 +110,15 @@ pr_create() {
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# pr_find_by_branch — Find an open PR by head branch name. # pr_find_by_branch — Find an open PR by head branch name.
# Args: branch [api_url] # Args: branch
# Stdout: PR number # Stdout: PR number
# Returns: 0=found, 1=not found # Returns: 0=found, 1=not found
# api_url defaults to FORGE_API if not provided
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
pr_find_by_branch() { pr_find_by_branch() {
local branch="$1" local branch="$1"
local api_url="${2:-${FORGE_API}}"
local pr_num local pr_num
pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${api_url}/pulls?state=open&limit=20" | \ "${FORGE_API}/pulls?state=open&limit=20" | \
jq -r --arg b "$branch" '.[] | select(.head.ref == $b) | .number' \ jq -r --arg b "$branch" '.[] | select(.head.ref == $b) | .number' \
| head -1) || true | head -1) || true
if [ -n "$pr_num" ]; then if [ -n "$pr_num" ]; then
@ -348,29 +344,6 @@ pr_is_merged() {
[ "$merged" = "true" ] [ "$merged" = "true" ]
} }
# ---------------------------------------------------------------------------
# pr_close — Close a PR via forge API.
# Args: pr_number
# Returns: 0=closed, 1=error
# ---------------------------------------------------------------------------
pr_close() {
local pr_num="$1"
_prl_log "closing PR #${pr_num}"
local resp http_code
resp=$(curl -sf -w "\n%{http_code}" -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/pulls/${pr_num}" \
-d '{"state":"closed"}' 2>/dev/null) || true
http_code=$(printf '%s\n' "$resp" | tail -1)
if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then
_prl_log "pr_close FAILED: HTTP ${http_code} for PR #${pr_num}"
return 1
fi
_prl_log "PR #${pr_num} closed"
}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# pr_walk_to_merge — Walk a PR through CI, review, and merge. # pr_walk_to_merge — Walk a PR through CI, review, and merge.
# #
@ -405,18 +378,11 @@ pr_walk_to_merge() {
if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then
ci_retry_count=$((ci_retry_count + 1)) ci_retry_count=$((ci_retry_count + 1))
_prl_log "infra failure — retriggering CI (retry ${ci_retry_count})" _prl_log "infra failure — retriggering CI (retry ${ci_retry_count})"
local rebase_output rebase_rc
( cd "$worktree" && \ ( cd "$worktree" && \
git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \ git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \
git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \ git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \
git rebase "${remote}/${PRIMARY_BRANCH}" && \ git rebase "${remote}/${PRIMARY_BRANCH}" && \
git push --force-with-lease "$remote" HEAD ) > /tmp/rebase-output-$$ 2>&1 git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true
rebase_rc=$?
rebase_output=$(cat /tmp/rebase-output-$$)
rm -f /tmp/rebase-output-$$
if [ "$rebase_rc" -ne 0 ]; then
_prl_log "rebase/push failed (exit code $rebase_rc): $(echo "$rebase_output" | tail -5)"
fi
continue continue
fi fi
@ -428,23 +394,6 @@ pr_walk_to_merge() {
fi fi
_prl_log "CI failed — invoking agent (attempt ${ci_fix_count}/${max_ci_fixes})" _prl_log "CI failed — invoking agent (attempt ${ci_fix_count}/${max_ci_fixes})"
# Get CI logs from SQLite database if available
local ci_logs=""
if [ -n "$_PR_CI_PIPELINE" ] && [ -n "${FACTORY_ROOT:-}" ]; then
ci_logs=$(ci_get_logs "$_PR_CI_PIPELINE" 2>/dev/null | tail -50) || ci_logs=""
fi
local logs_section=""
if [ -n "$ci_logs" ]; then
logs_section="
CI Log Output (last 50 lines):
\`\`\`
${ci_logs}
\`\`\`
"
fi
agent_run --resume "$session_id" --worktree "$worktree" \ agent_run --resume "$session_id" --worktree "$worktree" \
"CI failed on PR #${pr_num} (attempt ${ci_fix_count}/${max_ci_fixes}). "CI failed on PR #${pr_num} (attempt ${ci_fix_count}/${max_ci_fixes}).
@ -452,7 +401,7 @@ Pipeline: #${_PR_CI_PIPELINE:-?}
Failure type: ${_PR_CI_FAILURE_TYPE:-unknown} Failure type: ${_PR_CI_FAILURE_TYPE:-unknown}
Error log: Error log:
${_PR_CI_ERROR_LOG:-No logs available.}${logs_section} ${_PR_CI_ERROR_LOG:-No logs available.}
Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push: Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push:
git fetch ${remote} ${PRIMARY_BRANCH} && git rebase ${remote}/${PRIMARY_BRANCH} git fetch ${remote} ${PRIMARY_BRANCH} && git rebase ${remote}/${PRIMARY_BRANCH}
@ -488,7 +437,11 @@ Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push:
_PR_WALK_EXIT_REASON="merged" _PR_WALK_EXIT_REASON="merged"
return 0 return 0
fi fi
# Merge failed (conflict or HTTP 405) — ask agent to rebase if [ "$rc" -eq 2 ]; then
_PR_WALK_EXIT_REASON="merge_blocked"
return 1
fi
# Merge failed (conflict) — ask agent to rebase
_prl_log "merge failed — invoking agent to rebase" _prl_log "merge failed — invoking agent to rebase"
agent_run --resume "$session_id" --worktree "$worktree" \ agent_run --resume "$session_id" --worktree "$worktree" \
"PR #${pr_num} approved but merge failed: ${_PR_MERGE_ERROR:-unknown} "PR #${pr_num} approved but merge failed: ${_PR_MERGE_ERROR:-unknown}
@ -534,7 +487,8 @@ Commit, rebase on ${PRIMARY_BRANCH}, and push:
# build_phase_protocol_prompt — Generate push/commit instructions for Claude. # build_phase_protocol_prompt — Generate push/commit instructions for Claude.
# #
# For the synchronous agent_run architecture: tells Claude how to commit and # For the synchronous agent_run architecture: tells Claude how to commit and
# push (no phase files). # push (no phase files). For the tmux session architecture, use the
# build_phase_protocol_prompt in dev/phase-handler.sh instead.
# #
# Args: branch [remote] # Args: branch [remote]
# Stdout: instruction text # Stdout: instruction text

View file

@ -1,179 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# release.sh — disinto_release() function
#
# Handles vault TOML creation, branch setup on ops repo, PR creation,
# and auto-merge request for a versioned release.
#
# Globals expected:
# FORGE_URL - Forge instance URL (e.g. http://localhost:3000)
# FORGE_TOKEN - API token for Forge operations
# FORGE_OPS_REPO - Ops repo slug (e.g. disinto-admin/myproject-ops)
# FACTORY_ROOT - Root of the disinto factory
# PRIMARY_BRANCH - Primary branch name (e.g. main)
#
# Usage:
# source "${FACTORY_ROOT}/lib/release.sh"
# disinto_release <version>
# =============================================================================
set -euo pipefail
# Source vault.sh for _vault_log helper
source "${FACTORY_ROOT}/lib/vault.sh"
# Assert required globals are set before using this module.
_assert_release_globals() {
local missing=()
[ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL")
[ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN")
[ -z "${FORGE_OPS_REPO:-}" ] && missing+=("FORGE_OPS_REPO")
[ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
[ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
if [ "${#missing[@]}" -gt 0 ]; then
echo "Error: release.sh requires these globals to be set: ${missing[*]}" >&2
exit 1
fi
}
disinto_release() {
_assert_release_globals
local version="${1:-}"
local formula_path="${FACTORY_ROOT}/formulas/release.toml"
if [ -z "$version" ]; then
echo "Error: version required" >&2
echo "Usage: disinto release <version>" >&2
echo "Example: disinto release v1.2.0" >&2
exit 1
fi
# Validate version format (must start with 'v' followed by semver)
if ! echo "$version" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then
echo "Error: version must be in format v1.2.3 (semver with 'v' prefix)" >&2
exit 1
fi
# Load project config to get FORGE_OPS_REPO
if [ -z "${PROJECT_NAME:-}" ]; then
# PROJECT_NAME is unset - detect project TOML from projects/ directory
local found_toml
found_toml=$(find "${FACTORY_ROOT}/projects" -maxdepth 1 -name "*.toml" ! -name "*.example" 2>/dev/null | head -1)
if [ -n "$found_toml" ]; then
source "${FACTORY_ROOT}/lib/load-project.sh" "$found_toml"
fi
else
local project_toml="${FACTORY_ROOT}/projects/${PROJECT_NAME}.toml"
if [ -f "$project_toml" ]; then
source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml"
fi
fi
# Check formula exists
if [ ! -f "$formula_path" ]; then
echo "Error: release formula not found at ${formula_path}" >&2
exit 1
fi
# Get the ops repo root
local ops_root="${FACTORY_ROOT}/../disinto-ops"
if [ ! -d "${ops_root}/.git" ]; then
echo "Error: ops repo not found at ${ops_root}" >&2
echo " Run 'disinto init' to set up the ops repo first" >&2
exit 1
fi
# Generate a unique ID for the vault item
local id="release-${version//./}"
local vault_toml="${ops_root}/vault/actions/${id}.toml"
# Create vault TOML with the specific version
cat > "$vault_toml" <<EOF
# vault/actions/${id}.toml
# Release vault item for ${version}
# Auto-generated by disinto release
id = "${id}"
formula = "release"
context = "Release ${version}"
secrets = ["GITHUB_TOKEN", "CODEBERG_TOKEN"]
mounts = ["ssh"]
EOF
echo "Created vault item: ${vault_toml}"
# Create a PR to submit the vault item to the ops repo
local branch_name="release/${version//./}"
local pr_title="release: ${version}"
local pr_body="Release ${version}
This PR creates a vault item for the release of version ${version}.
## Changes
- Added vault item: ${id}.toml
## Next Steps
1. Review this PR
2. Approve and merge
3. The vault runner will execute the release formula
"
# Create branch from clean primary branch
(
cd "$ops_root"
git checkout "$PRIMARY_BRANCH"
git pull origin "$PRIMARY_BRANCH"
git checkout -B "$branch_name" "$PRIMARY_BRANCH"
# Add and commit only the vault TOML file
git add "vault/actions/${id}.toml"
git commit -m "$pr_title" -m "$pr_body" 2>/dev/null || true
# Push branch
git push -u origin "$branch_name" 2>/dev/null || {
echo "Error: failed to push branch" >&2
exit 1
}
)
# Create PR
local pr_response
pr_response=$(curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls" \
-d "{\"title\":\"${pr_title}\",\"head\":\"${branch_name}\",\"base\":\"${PRIMARY_BRANCH}\",\"body\":\"$(echo "$pr_body" | sed ':a;N;$!ba;s/\n/\\n/g')\"}" 2>/dev/null) || {
echo "Error: failed to create PR" >&2
echo "Response: ${pr_response}" >&2
exit 1
}
local pr_number
pr_number=$(echo "$pr_response" | jq -r '.number')
local pr_url="${FORGE_URL}/${FORGE_OPS_REPO}/pulls/${pr_number}"
# Enable auto-merge on the PR — Forgejo will auto-merge after approval
_vault_log "Enabling auto-merge for PR #${pr_number}"
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/merge" \
-d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || {
echo "Warning: failed to enable auto-merge (may already be enabled or not supported)" >&2
}
echo ""
echo "Release PR created: ${pr_url}"
echo ""
echo "Next steps:"
echo " 1. Review the PR"
echo " 2. Approve the PR (auto-merge will trigger after approval)"
echo " 3. The vault runner will execute the release formula"
echo ""
echo "After merge, the release will:"
echo " 1. Tag Forgejo main with ${version}"
echo " 2. Push tag to mirrors (Codeberg, GitHub)"
echo " 3. Build and tag the agents Docker image"
echo " 4. Restart agent containers"
}

View file

@ -1,197 +0,0 @@
#!/usr/bin/env bash
# stack-lock.sh — File-based lock protocol for singleton project stack access
#
# Prevents CI pipelines and the reproduce-agent from stepping on each other
# when sharing a single project stack (e.g. harb docker compose).
#
# Lock file: /home/agent/data/locks/<project>-stack.lock
# Contents: {"holder": "reproduce-agent-42", "since": "...", "heartbeat": "..."}
#
# Protocol:
# 1. stack_lock_check — inspect current lock state
# 2. stack_lock_acquire — wait until lock is free, then claim it
# 3. stack_lock_release — delete lock file when done
#
# Heartbeat: callers must update the heartbeat every 2 minutes while holding
# the lock by calling stack_lock_heartbeat. A heartbeat older than 10 minutes
# is considered stale — the next acquire will break it.
#
# Usage:
# source "$(dirname "$0")/../lib/stack-lock.sh"
# stack_lock_acquire "ci-pipeline-$BUILD_NUMBER" "myproject"
# trap 'stack_lock_release "myproject"' EXIT
# # ... do work ...
# stack_lock_release "myproject"
set -euo pipefail
STACK_LOCK_DIR="${HOME}/data/locks"
STACK_LOCK_POLL_INTERVAL=30 # seconds between retry polls
STACK_LOCK_STALE_SECONDS=600 # 10 minutes — heartbeat older than this = stale
STACK_LOCK_MAX_WAIT=3600 # 1 hour — give up after this many seconds
# _stack_lock_path <project>
# Print the path of the lock file for the given project.
_stack_lock_path() {
local project="$1"
echo "${STACK_LOCK_DIR}/${project}-stack.lock"
}
# _stack_lock_now
# Print current UTC timestamp in ISO-8601 format.
_stack_lock_now() {
date -u +"%Y-%m-%dT%H:%M:%SZ"
}
# _stack_lock_epoch <iso_timestamp>
# Convert an ISO-8601 UTC timestamp to a Unix epoch integer.
_stack_lock_epoch() {
local ts="$1"
# Strip trailing Z, replace T with space for `date -d`
date -u -d "${ts%Z}" +%s 2>/dev/null || date -u -j -f "%Y-%m-%dT%H:%M:%S" "${ts%Z}" +%s 2>/dev/null
}
# stack_lock_check <project>
# Print lock status to stdout: "free", "held:<holder>", or "stale:<holder>".
# Returns 0 in all cases (status is in stdout).
stack_lock_check() {
local project="$1"
local lock_file
lock_file="$(_stack_lock_path "$project")"
if [ ! -f "$lock_file" ]; then
echo "free"
return 0
fi
local holder heartbeat
holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder","unknown"))' "$lock_file" 2>/dev/null || echo "unknown")
heartbeat=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("heartbeat",""))' "$lock_file" 2>/dev/null || echo "")
if [ -z "$heartbeat" ]; then
echo "stale:${holder}"
return 0
fi
local hb_epoch now_epoch age
hb_epoch=$(_stack_lock_epoch "$heartbeat" 2>/dev/null || echo "0")
now_epoch=$(date -u +%s)
age=$(( now_epoch - hb_epoch ))
if [ "$age" -gt "$STACK_LOCK_STALE_SECONDS" ]; then
echo "stale:${holder}"
else
echo "held:${holder}"
fi
}
# stack_lock_acquire <holder_id> <project> [max_wait_seconds]
# Acquire the lock for <project> on behalf of <holder_id>.
# Polls every STACK_LOCK_POLL_INTERVAL seconds.
# Breaks stale locks automatically.
# Exits non-zero if the lock cannot be acquired within max_wait_seconds.
stack_lock_acquire() {
local holder="$1"
local project="$2"
local max_wait="${3:-$STACK_LOCK_MAX_WAIT}"
local lock_file
lock_file="$(_stack_lock_path "$project")"
local deadline
deadline=$(( $(date -u +%s) + max_wait ))
mkdir -p "$STACK_LOCK_DIR"
while true; do
local status
status=$(stack_lock_check "$project")
case "$status" in
free)
# Write to temp file then rename to avoid partial reads by other processes
local tmp_lock
tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX")
local now
now=$(_stack_lock_now)
printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \
"$holder" "$now" "$now" > "$tmp_lock"
mv "$tmp_lock" "$lock_file"
echo "[stack-lock] acquired lock for ${project} as ${holder}" >&2
return 0
;;
stale:*)
local stale_holder="${status#stale:}"
echo "[stack-lock] breaking stale lock held by ${stale_holder} for ${project}" >&2
rm -f "$lock_file"
# Loop back immediately to re-check and claim
;;
held:*)
local cur_holder="${status#held:}"
local remaining
remaining=$(( deadline - $(date -u +%s) ))
if [ "$remaining" -le 0 ]; then
echo "[stack-lock] timed out waiting for lock on ${project} (held by ${cur_holder})" >&2
return 1
fi
echo "[stack-lock] ${project} locked by ${cur_holder}, waiting ${STACK_LOCK_POLL_INTERVAL}s (${remaining}s left)..." >&2
sleep "$STACK_LOCK_POLL_INTERVAL"
;;
*)
echo "[stack-lock] unexpected status '${status}' for ${project}" >&2
return 1
;;
esac
done
}
# stack_lock_heartbeat <holder_id> <project>
# Update the heartbeat timestamp in the lock file.
# Should be called every 2 minutes while holding the lock.
# No-op if the lock file is absent or held by a different holder.
stack_lock_heartbeat() {
local holder="$1"
local project="$2"
local lock_file
lock_file="$(_stack_lock_path "$project")"
[ -f "$lock_file" ] || return 0
local current_holder
current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "")
[ "$current_holder" = "$holder" ] || return 0
local since
since=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("since",""))' "$lock_file" 2>/dev/null || echo "")
local now
now=$(_stack_lock_now)
local tmp_lock
tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX")
printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \
"$holder" "$since" "$now" > "$tmp_lock"
mv "$tmp_lock" "$lock_file"
}
# stack_lock_release <project> [holder_id]
# Release the lock for <project>.
# If holder_id is provided, only releases if the lock is held by that holder
# (prevents accidentally releasing someone else's lock).
stack_lock_release() {
local project="$1"
local holder="${2:-}"
local lock_file
lock_file="$(_stack_lock_path "$project")"
[ -f "$lock_file" ] || return 0
if [ -n "$holder" ]; then
local current_holder
current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "")
if [ "$current_holder" != "$holder" ]; then
echo "[stack-lock] refusing to release: lock held by '${current_holder}', not '${holder}'" >&2
return 1
fi
fi
rm -f "$lock_file"
echo "[stack-lock] released lock for ${project}" >&2
}

View file

@ -1,312 +0,0 @@
#!/usr/bin/env bash
# vault.sh — Helper for agents to create vault PRs on ops repo
#
# Source after lib/env.sh:
# source "$(dirname "$0")/../lib/env.sh"
# source "$(dirname "$0")/lib/vault.sh"
#
# Required globals: FORGE_TOKEN, FORGE_URL, FORGE_REPO, FORGE_OPS_REPO
# Optional: OPS_REPO_ROOT (local path for ops repo)
#
# Functions:
# vault_request <action_id> <toml_content> — Create vault PR, return PR number
#
# The function:
# 1. Validates TOML content using validate_vault_action() from vault/vault-env.sh
# 2. Creates a branch on the ops repo: vault/<action-id>
# 3. Writes TOML to vault/actions/<action-id>.toml on that branch
# 4. Creates PR targeting main with title "vault: <action-id>"
# 5. Body includes context field from TOML
# 6. Returns PR number (existing or newly created)
#
# Idempotent: if PR for same action-id exists, returns its number
#
# Uses Forgejo REST API (not git push) — works from containers without SSH
set -euo pipefail
# Internal log helper
_vault_log() {
if declare -f log >/dev/null 2>&1; then
log "vault: $*"
else
printf '[%s] vault: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2
fi
}
# Get ops repo API URL
_vault_ops_api() {
printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}"
}
# -----------------------------------------------------------------------------
# _vault_commit_direct — Commit low-tier action directly to ops main
# Args: ops_api tmp_toml_file action_id
# Uses FORGE_ADMIN_TOKEN to bypass PR workflow
# -----------------------------------------------------------------------------
_vault_commit_direct() {
local ops_api="$1"
local tmp_toml="$2"
local action_id="$3"
local file_path="vault/actions/${action_id}.toml"
# Use FORGE_ADMIN_TOKEN for direct commit (vault-bot identity)
local admin_token="${FORGE_ADMIN_TOKEN:-${FORGE_TOKEN}}"
if [ -z "$admin_token" ]; then
echo "ERROR: FORGE_ADMIN_TOKEN is required for low-tier commits" >&2
return 1
fi
# Get main branch SHA
local main_sha
main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \
"${ops_api}/git/branches/${PRIMARY_BRANCH:-main}" 2>/dev/null | \
jq -r '.commit.id // empty' || true)
if [ -z "$main_sha" ]; then
main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \
"${ops_api}/git/refs/heads/${PRIMARY_BRANCH:-main}" 2>/dev/null | \
jq -r '.object.sha // empty' || true)
fi
if [ -z "$main_sha" ]; then
echo "ERROR: could not get main branch SHA" >&2
return 1
fi
_vault_log "Committing ${file_path} directly to ${PRIMARY_BRANCH:-main}"
# Encode TOML content as base64
local encoded_content
encoded_content=$(base64 -w 0 < "$tmp_toml")
# Commit directly to main branch using Forgejo content API
if ! curl -sf -X PUT \
-H "Authorization: token ${admin_token}" \
-H "Content-Type: application/json" \
"${ops_api}/contents/${file_path}" \
-d "{\"message\":\"vault: add ${action_id} (low-tier)\",\"branch\":\"${PRIMARY_BRANCH:-main}\",\"content\":\"${encoded_content}\",\"committer\":{\"name\":\"vault-bot\",\"email\":\"vault-bot@${FORGE_REPO}\"},\"overwrite\":true}" >/dev/null 2>&1; then
echo "ERROR: failed to write ${file_path} to ${PRIMARY_BRANCH:-main}" >&2
return 1
fi
_vault_log "Direct commit successful for ${action_id}"
}
# -----------------------------------------------------------------------------
# vault_request — Create a vault PR or return existing one
# Args: action_id toml_content
# Stdout: PR number
# Returns: 0=success, 1=validation failed, 2=API error
# -----------------------------------------------------------------------------
vault_request() {
local action_id="$1"
local toml_content="$2"
if [ -z "$action_id" ]; then
echo "ERROR: action_id is required" >&2
return 1
fi
if [ -z "$toml_content" ]; then
echo "ERROR: toml_content is required" >&2
return 1
fi
# Get admin token for API calls (FORGE_ADMIN_TOKEN for low-tier, FORGE_TOKEN otherwise)
local admin_token="${FORGE_ADMIN_TOKEN:-${FORGE_TOKEN}}"
# Check if PR already exists for this action
local existing_pr
existing_pr=$(pr_find_by_branch "vault/${action_id}" "$(_vault_ops_api)") || true
if [ -n "$existing_pr" ]; then
_vault_log "PR already exists for action $action_id: #${existing_pr}"
printf '%s' "$existing_pr"
return 0
fi
# Validate TOML content
local tmp_toml
tmp_toml=$(mktemp /tmp/vault-XXXXXX.toml)
trap 'rm -f "$tmp_toml"' RETURN
printf '%s' "$toml_content" > "$tmp_toml"
# Source vault-env.sh for validate_vault_action
local vault_env="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/vault-env.sh"
if [ ! -f "$vault_env" ]; then
echo "ERROR: vault-env.sh not found at $vault_env" >&2
return 1
fi
# Save caller's FORGE_TOKEN, source vault-env.sh for validate_vault_action,
# then restore caller's token so PR creation uses agent's identity (not vault-bot)
local _saved_forge_token="${FORGE_TOKEN:-}"
if ! source "$vault_env"; then
FORGE_TOKEN="${_saved_forge_token:-}"
echo "ERROR: failed to source vault-env.sh" >&2
return 1
fi
# Restore caller's FORGE_TOKEN after validation
FORGE_TOKEN="${_saved_forge_token:-}"
# Run validation
if ! validate_vault_action "$tmp_toml"; then
echo "ERROR: TOML validation failed" >&2
return 1
fi
# Get ops repo API URL
local ops_api
ops_api="$(_vault_ops_api)"
# Classify the action to determine if PR bypass is allowed
local classify_script="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/classify.sh"
local vault_tier
vault_tier=$("$classify_script" "${VAULT_ACTION_FORMULA:-}" "${VAULT_BLAST_RADIUS_OVERRIDE:-}") || {
# Classification failed, default to high tier (require PR)
vault_tier="high"
_vault_log "Warning: classification failed, defaulting to high tier"
}
export VAULT_TIER="${vault_tier}"
# For low-tier actions, commit directly to ops main using FORGE_ADMIN_TOKEN
if [ "$vault_tier" = "low" ]; then
_vault_log "low-tier — committed directly to ops main"
# Add dispatch_mode field to indicate direct commit (no PR)
local direct_toml
direct_toml=$(mktemp /tmp/vault-direct-XXXXXX.toml)
trap 'rm -f "$tmp_toml" "$direct_toml"' RETURN
# Prepend dispatch_mode = "direct" to the TOML
printf 'dispatch_mode = "direct"\n%s\n' "$toml_content" > "$direct_toml"
_vault_commit_direct "$ops_api" "$direct_toml" "${action_id}"
return 0
fi
# Extract values for PR creation (medium/high tier)
local pr_title pr_body
pr_title="vault: ${action_id}"
pr_body="Vault action: ${action_id}
Context: ${VAULT_ACTION_CONTEXT:-No context provided}
Formula: ${VAULT_ACTION_FORMULA:-}
Secrets: ${VAULT_ACTION_SECRETS:-}
---
This vault action has been created by an agent and requires admin approval
before execution. See the TOML file for details."
# Create branch
local branch="vault/${action_id}"
local branch_exists
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${admin_token}" \
"${ops_api}/git/branches/${branch}" 2>/dev/null || echo "0")
if [ "$branch_exists" != "200" ]; then
# Branch doesn't exist, create it from main
_vault_log "Creating branch ${branch} on ops repo"
# Get the commit SHA of main branch
local main_sha
main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \
"${ops_api}/git/branches/${PRIMARY_BRANCH:-main}" 2>/dev/null | \
jq -r '.commit.id // empty' || true)
if [ -z "$main_sha" ]; then
# Fallback: get from refs
main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \
"${ops_api}/git/refs/heads/${PRIMARY_BRANCH:-main}" 2>/dev/null | \
jq -r '.object.sha // empty' || true)
fi
if [ -z "$main_sha" ]; then
echo "ERROR: could not get main branch SHA" >&2
return 1
fi
# Create the branch
if ! curl -sf -X POST \
-H "Authorization: token ${admin_token}" \
-H "Content-Type: application/json" \
"${ops_api}/git/branches" \
-d "{\"ref\":\"${branch}\",\"sha\":\"${main_sha}\"}" >/dev/null 2>&1; then
echo "ERROR: failed to create branch ${branch}" >&2
return 1
fi
else
_vault_log "Branch ${branch} already exists"
fi
# Write TOML file to branch via API
local file_path="vault/actions/${action_id}.toml"
_vault_log "Writing ${file_path} to branch ${branch}"
# Encode TOML content as base64
local encoded_content
encoded_content=$(printf '%s' "$toml_content" | base64 -w 0)
# Upload file using Forgejo content API
if ! curl -sf -X PUT \
-H "Authorization: token ${admin_token}" \
-H "Content-Type: application/json" \
"${ops_api}/contents/${file_path}" \
-d "{\"message\":\"vault: add ${action_id}\",\"branch\":\"${branch}\",\"content\":\"${encoded_content}\",\"committer\":{\"name\":\"vault-bot\",\"email\":\"vault-bot@${FORGE_REPO}\"},\"overwrite\":true}" >/dev/null 2>&1; then
echo "ERROR: failed to write ${file_path} to branch ${branch}" >&2
return 1
fi
# Create PR
_vault_log "Creating PR for ${branch}"
local pr_num
pr_num=$(pr_create "$branch" "$pr_title" "$pr_body" "$PRIMARY_BRANCH" "$ops_api") || {
echo "ERROR: failed to create PR" >&2
return 1
}
# Enable auto-merge on the PR — Forgejo will auto-merge after approval
_vault_log "Enabling auto-merge for PR #${pr_num}"
curl -sf -X POST \
-H "Authorization: token ${admin_token}" \
-H "Content-Type: application/json" \
"${ops_api}/pulls/${pr_num}/merge" \
-d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || {
_vault_log "Warning: failed to enable auto-merge (may already be enabled or not supported)"
}
# Add labels to PR (vault, pending-approval)
_vault_log "PR #${pr_num} created, adding labels"
# Get label IDs
local vault_label_id pending_label_id
vault_label_id=$(curl -sf -H "Authorization: token ${admin_token}" \
"${ops_api}/labels" 2>/dev/null | \
jq -r --arg n "vault" '.[] | select(.name == $n) | .id // empty' || true)
pending_label_id=$(curl -sf -H "Authorization: token ${admin_token}" \
"${ops_api}/labels" 2>/dev/null | \
jq -r --arg n "pending-approval" '.[] | select(.name == $n) | .id // empty' || true)
# Add labels if they exist
if [ -n "$vault_label_id" ]; then
curl -sf -X POST \
-H "Authorization: token ${admin_token}" \
-H "Content-Type: application/json" \
"${ops_api}/issues/${pr_num}/labels" \
-d "[{\"id\":${vault_label_id}}]" >/dev/null 2>&1 || true
fi
if [ -n "$pending_label_id" ]; then
curl -sf -X POST \
-H "Authorization: token ${admin_token}" \
-H "Content-Type: application/json" \
"${ops_api}/issues/${pr_num}/labels" \
-d "[{\"id\":${pending_label_id}}]" >/dev/null 2>&1 || true
fi
printf '%s' "$pr_num"
return 0
}

View file

@ -1,8 +1,8 @@
<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 --> <!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
# Planner Agent # Planner Agent
**Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints), **Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints),
invoked by the polling loop in `docker/agents/entrypoint.sh` every 12 hours (iteration math at line 210-222) via tmux + Claude. executed directly from cron via tmux + Claude.
Phase 0 (preflight): pull latest code, load persistent memory and prerequisite Phase 0 (preflight): pull latest code, load persistent memory and prerequisite
tree from `$OPS_REPO_ROOT/knowledge/planner-memory.md` and `$OPS_REPO_ROOT/prerequisites.md`. Also reads tree from `$OPS_REPO_ROOT/knowledge/planner-memory.md` and `$OPS_REPO_ROOT/prerequisites.md`. Also reads
all available formulas: factory formulas (`$FACTORY_ROOT/formulas/*.toml`) and all available formulas: factory formulas (`$FACTORY_ROOT/formulas/*.toml`) and
@ -22,13 +22,12 @@ to detect issues ping-ponging between backlog and underspecified. Issues that
need human decisions or external resources are filed as vault procurement items need human decisions or external resources are filed as vault procurement items
(`$OPS_REPO_ROOT/vault/pending/*.md`) instead of being escalated. Phase 3 (`$OPS_REPO_ROOT/vault/pending/*.md`) instead of being escalated. Phase 3
(file-at-constraints): identify the top 3 unresolved prerequisites that block (file-at-constraints): identify the top 3 unresolved prerequisites that block
the most downstream objectives — file issues using a **template-or-vision gate**: the most downstream objectives — file issues as either `backlog` (code changes,
read issue templates from `.codeberg/ISSUE_TEMPLATE/*.yaml`, attempt to fill dev-agent) or `action` (run existing formula, action-agent). **Stuck issues
template fields (affected_files ≤3, acceptance_criteria ≤5, single clear approach), (detected BOUNCED/LABEL_CHURN) are dispatched to the `groom-backlog` formula
then apply complexity test: if work touches one subsystem with no design forks, in breakdown mode instead of being re-promoted** — this breaks the ping-pong
file as `backlog` using matching template (bug/feature/refactor); otherwise loop by splitting them into dev-agent-sized sub-issues. **Human-blocked issues
label `vision` with problem statement and why it's vision-sized. **Human-blocked are routed through the vault** — the planner files an actionable procurement
issues are routed through the vault** — the planner files an actionable procurement
item (`$OPS_REPO_ROOT/vault/pending/<project>-<slug>.md` with What/Why/Human action/Factory item (`$OPS_REPO_ROOT/vault/pending/<project>-<slug>.md` with What/Why/Human action/Factory
will then sections) and marks the prerequisite as blocked-on-vault in the tree. will then sections) and marks the prerequisite as blocked-on-vault in the tree.
Deduplication: checks pending/ + approved/ + fired/ before creating. Deduplication: checks pending/ + approved/ + fired/ before creating.
@ -41,31 +40,31 @@ AGENTS.md maintenance is handled by the Gardener.
prerequisite tree, memory, vault state) live under `$OPS_REPO_ROOT/`. prerequisite tree, memory, vault state) live under `$OPS_REPO_ROOT/`.
Each project manages its own planner state in a separate ops repo. Each project manages its own planner state in a separate ops repo.
**Trigger**: `planner-run.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh` **Trigger**: `planner-run.sh` runs daily via cron (accepts an optional project
every 12 hours (iteration math at line 210-222). Accepts an optional project TOML argument, TOML argument, defaults to `projects/disinto.toml`). Sources `lib/guard.sh` and
defaults to `projects/disinto.toml`. Sources `lib/guard.sh` and calls `check_active planner` calls `check_active planner` first — skips if `$FACTORY_ROOT/state/.planner-active`
first — skips if `$FACTORY_ROOT/state/.planner-active` is absent. Then creates a tmux session is absent. Then creates a tmux session with `claude --model opus`, injects
with `claude --model opus`, injects `formulas/run-planner.toml` as context, monitors the `formulas/run-planner.toml` as context, monitors the phase file, and cleans up
phase file, and cleans up on completion or timeout. No action issues — the planner is a on completion or timeout. No action issues — the planner is a nervous system
nervous system component, not work. component, not work.
**Key files**: **Key files**:
- `planner/planner-run.sh`Polling loop participant + orchestrator: lock, memory guard, - `planner/planner-run.sh`Cron wrapper + orchestrator: lock, memory guard,
sources disinto project config, builds structural analysis via `lib/formula-session.sh:build_graph_section()`, sources disinto project config, builds structural analysis via `lib/formula-session.sh:build_graph_section()`,
creates tmux session, injects formula prompt, monitors phase file, handles crash recovery, cleans up creates tmux session, injects formula prompt, monitors phase file, handles crash recovery, cleans up
- `formulas/run-planner.toml` — Execution spec: six steps (preflight, - `formulas/run-planner.toml` — Execution spec: six steps (preflight,
prediction-triage, update-prerequisite-tree, file-at-constraints, prediction-triage, update-prerequisite-tree, file-at-constraints,
journal-and-memory, commit-and-pr) with `needs` dependencies. Claude journal-and-memory, commit-and-pr) with `needs` dependencies. Claude
executes all steps in a single interactive session with tool access executes all steps in a single interactive session with tool access
- `formulas/groom-backlog.toml`Grooming formula for backlog triage and - `formulas/groom-backlog.toml`Dual-mode formula: grooming (default) or
grooming. (Note: the planner no longer dispatches breakdown mode — complex breakdown (dispatched by planner for bounced/stuck issues — splits the issue
issues are labeled `vision` instead.) into dev-agent-sized sub-issues, removes `underspecified` label)
- `$OPS_REPO_ROOT/prerequisites.md` — Prerequisite tree: versioned constraint - `$OPS_REPO_ROOT/prerequisites.md` — Prerequisite tree: versioned constraint
map linking VISION.md objectives to their prerequisites. Planner owns the map linking VISION.md objectives to their prerequisites. Planner owns the
tree, humans steer by editing VISION.md. Tree grows organically as the tree, humans steer by editing VISION.md. Tree grows organically as the
planner discovers new prerequisites during runs planner discovers new prerequisites during runs
- `$OPS_REPO_ROOT/knowledge/planner-memory.md` — Persistent memory across runs (in ops repo) - `$OPS_REPO_ROOT/knowledge/planner-memory.md` — Persistent memory across runs (in ops repo)
- `$OPS_REPO_ROOT/journal/planner/*.md` — Daily raw logs from each planner run (in ops repo)
**Constraint focus**: The planner uses Theory of Constraints to avoid premature **Constraint focus**: The planner uses Theory of Constraints to avoid premature
issue filing. Only the top 3 unresolved prerequisites that block the most issue filing. Only the top 3 unresolved prerequisites that block the most

View file

@ -1,12 +1,12 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# ============================================================================= # =============================================================================
# planner-run.sh — Polling-loop wrapper: planner execution via SDK + formula # planner-run.sh — Cron wrapper: planner execution via SDK + formula
# #
# Synchronous bash loop using claude -p (one-shot invocation). # Synchronous bash loop using claude -p (one-shot invocation).
# No tmux sessions, no phase files — the bash script IS the state machine. # No tmux sessions, no phase files — the bash script IS the state machine.
# #
# Flow: # Flow:
# 1. Guards: run lock, memory check # 1. Guards: cron lock, memory check
# 2. Load formula (formulas/run-planner.toml) # 2. Load formula (formulas/run-planner.toml)
# 3. Context: VISION.md, AGENTS.md, ops:RESOURCES.md, structural graph, # 3. Context: VISION.md, AGENTS.md, ops:RESOURCES.md, structural graph,
# planner memory, journal entries # planner memory, journal entries
@ -35,7 +35,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
# shellcheck source=../lib/agent-sdk.sh # shellcheck source=../lib/agent-sdk.sh
source "$FACTORY_ROOT/lib/agent-sdk.sh" source "$FACTORY_ROOT/lib/agent-sdk.sh"
LOG_FILE="${DISINTO_LOG_DIR}/planner/planner.log" LOG_FILE="$SCRIPT_DIR/planner.log"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh # shellcheck disable=SC2034 # consumed by agent-sdk.sh
LOGFILE="$LOG_FILE" LOGFILE="$LOG_FILE"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh # shellcheck disable=SC2034 # consumed by agent-sdk.sh
@ -43,64 +43,25 @@ SID_FILE="/tmp/planner-session-${PROJECT_NAME}.sid"
SCRATCH_FILE="/tmp/planner-${PROJECT_NAME}-scratch.md" SCRATCH_FILE="/tmp/planner-${PROJECT_NAME}-scratch.md"
WORKTREE="/tmp/${PROJECT_NAME}-planner-run" WORKTREE="/tmp/${PROJECT_NAME}-planner-run"
# Override LOG_AGENT for consistent agent identification log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
LOG_AGENT="planner"
# Override log() to append to planner-specific log file
# shellcheck disable=SC2034
log() {
local agent="${LOG_AGENT:-planner}"
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
}
# ── Guards ──────────────────────────────────────────────────────────────── # ── Guards ────────────────────────────────────────────────────────────────
check_active planner check_active planner
acquire_run_lock "/tmp/planner-run.lock" acquire_cron_lock "/tmp/planner-run.lock"
memory_guard 2000 check_memory 2000
log "--- Planner run start ---" log "--- Planner run start ---"
# ── Precondition checks: skip if nothing to plan ──────────────────────────
LAST_SHA_FILE="$FACTORY_ROOT/state/planner-last-sha"
LAST_OPS_SHA_FILE="$FACTORY_ROOT/state/planner-last-ops-sha"
CURRENT_SHA=$(git -C "$FACTORY_ROOT" rev-parse HEAD 2>/dev/null || echo "")
LAST_SHA=$(cat "$LAST_SHA_FILE" 2>/dev/null || echo "")
# ops repo is required for planner — pull before checking sha
ensure_ops_repo
CURRENT_OPS_SHA=$(git -C "$OPS_REPO_ROOT" rev-parse HEAD 2>/dev/null || echo "")
LAST_OPS_SHA=$(cat "$LAST_OPS_SHA_FILE" 2>/dev/null || echo "")
unreviewed_count=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues?labels=prediction/unreviewed&state=open&limit=1" 2>/dev/null | jq length) || unreviewed_count=0
vision_open=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues?labels=vision&state=open&limit=1" 2>/dev/null | jq length) || vision_open=0
if [ "$CURRENT_SHA" = "$LAST_SHA" ] \
&& [ "$CURRENT_OPS_SHA" = "$LAST_OPS_SHA" ] \
&& [ "${unreviewed_count:-0}" -eq 0 ] \
&& [ "${vision_open:-0}" -eq 0 ]; then
log "no new commits, no ops changes, no unreviewed predictions, no open vision — skipping"
exit 0
fi
log "sha=${CURRENT_SHA:0:8} ops=${CURRENT_OPS_SHA:0:8} unreviewed=${unreviewed_count} vision=${vision_open}"
# ── Resolve forge remote for git operations ─────────────────────────────
resolve_forge_remote
# ── Resolve agent identity for .profile repo ────────────────────────────
resolve_agent_identity || true
# ── Load formula + context ─────────────────────────────────────────────── # ── Load formula + context ───────────────────────────────────────────────
load_formula_or_profile "planner" "$FACTORY_ROOT/formulas/run-planner.toml" || exit 1 load_formula "$FACTORY_ROOT/formulas/run-planner.toml"
build_context_block VISION.md AGENTS.md ops:RESOURCES.md ops:prerequisites.md build_context_block VISION.md AGENTS.md ops:RESOURCES.md ops:prerequisites.md
# ── Build structural analysis graph ────────────────────────────────────── # ── Build structural analysis graph ──────────────────────────────────────
build_graph_section build_graph_section
# ── Ensure ops repo is available ───────────────────────────────────────
ensure_ops_repo
# ── Read planner memory ───────────────────────────────────────────────── # ── Read planner memory ─────────────────────────────────────────────────
MEMORY_BLOCK="" MEMORY_BLOCK=""
MEMORY_FILE="$OPS_REPO_ROOT/knowledge/planner-memory.md" MEMORY_FILE="$OPS_REPO_ROOT/knowledge/planner-memory.md"
@ -111,8 +72,24 @@ $(cat "$MEMORY_FILE")
" "
fi fi
# ── Prepare .profile context (lessons injection) ───────────────────────── # ── Read recent journal files ──────────────────────────────────────────
formula_prepare_profile_context JOURNAL_BLOCK=""
JOURNAL_DIR="$OPS_REPO_ROOT/journal/planner"
if [ -d "$JOURNAL_DIR" ]; then
# Load last 5 journal files (most recent first) for run history context
JOURNAL_FILES=$(find "$JOURNAL_DIR" -name '*.md' -type f | sort -r | head -5)
if [ -n "$JOURNAL_FILES" ]; then
JOURNAL_BLOCK="
### Recent journal entries (journal/planner/)
"
while IFS= read -r jf; do
JOURNAL_BLOCK="${JOURNAL_BLOCK}
#### $(basename "$jf")
$(cat "$jf")
"
done <<< "$JOURNAL_FILES"
fi
fi
# ── Read scratch file (compaction survival) ─────────────────────────────── # ── Read scratch file (compaction survival) ───────────────────────────────
SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE")
@ -128,7 +105,7 @@ build_sdk_prompt_footer "
PROMPT="You are the strategic planner for ${FORGE_REPO}. Work through the formula below. PROMPT="You are the strategic planner for ${FORGE_REPO}. Work through the formula below.
## Project context ## Project context
${CONTEXT_BLOCK}${MEMORY_BLOCK}$(formula_lessons_block) ${CONTEXT_BLOCK}${MEMORY_BLOCK}${JOURNAL_BLOCK}
${GRAPH_SECTION} ${GRAPH_SECTION}
${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT} ${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT}
} }
@ -148,13 +125,5 @@ export CLAUDE_MODEL="opus"
agent_run --worktree "$WORKTREE" "$PROMPT" agent_run --worktree "$WORKTREE" "$PROMPT"
log "agent_run complete" log "agent_run complete"
# Persist watermarks so next run can skip if nothing changed
mkdir -p "$FACTORY_ROOT/state"
echo "$CURRENT_SHA" > "$LAST_SHA_FILE"
echo "$CURRENT_OPS_SHA" > "$LAST_OPS_SHA_FILE"
# Write journal entry post-session
profile_write_journal "planner-run" "Planner run $(date -u +%Y-%m-%d)" "complete" "" || true
rm -f "$SCRATCH_FILE" rm -f "$SCRATCH_FILE"
log "--- Planner run done ---" log "--- Planner run done ---"

View file

@ -1,4 +1,4 @@
<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 --> <!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
# Predictor Agent # Predictor Agent
**Role**: Abstract adversary (the "goblin"). Runs a 2-step formula **Role**: Abstract adversary (the "goblin"). Runs a 2-step formula
@ -22,15 +22,14 @@ exploit counts as 2 (prediction + action dispatch). The predictor MUST NOT
emit feature work — only observations challenging claims, exposing gaps, emit feature work — only observations challenging claims, exposing gaps,
and surfacing risks. and surfacing risks.
**Trigger**: `predictor-run.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh` **Trigger**: `predictor-run.sh` runs daily at 06:00 UTC via cron (1h before
every 24 hours (iteration math at line 224-236). Sources `lib/guard.sh` and calls the planner at 07:00). Sources `lib/guard.sh` and calls `check_active predictor`
`check_active predictor` first — skips if `$FACTORY_ROOT/state/.predictor-active` is absent. first — skips if `$FACTORY_ROOT/state/.predictor-active` is absent. Also guarded
Also guarded by PID lock (`/tmp/predictor-run.lock`) and memory check (skips if available by PID lock (`/tmp/predictor-run.lock`) and memory check (skips if available
RAM < 2000 MB). Note: the 24h cadence is iteration-based, not anchored to 06:00 UTC RAM < 2000 MB).
drifts on container restart.
**Key files**: **Key files**:
- `predictor/predictor-run.sh`Polling loop participant + orchestrator: active-state guard, - `predictor/predictor-run.sh`Cron wrapper + orchestrator: active-state guard,
lock, memory guard, sources disinto project config, builds structural analysis lock, memory guard, sources disinto project config, builds structural analysis
via `lib/formula-session.sh:build_graph_section()` (full-project scan — results via `lib/formula-session.sh:build_graph_section()` (full-project scan — results
included in prompt as `## Structural analysis`; failures non-fatal), builds included in prompt as `## Structural analysis`; failures non-fatal), builds
@ -45,7 +44,7 @@ drifts on container restart.
- `FORGE_TOKEN`, `FORGE_PREDICTOR_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`, `OPS_REPO_ROOT` - `FORGE_TOKEN`, `FORGE_PREDICTOR_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`, `OPS_REPO_ROOT`
- `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by predictor-run.sh) - `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by predictor-run.sh)
**Lifecycle**: predictor-run.sh (invoked by polling loop every 24h) → lock + memory guard → **Lifecycle**: predictor-run.sh (daily 06:00 cron) → lock + memory guard →
load formula + context (AGENTS.md, VISION.md from code repo; RESOURCES.md, prerequisites.md from ops repo) load formula + context (AGENTS.md, VISION.md from code repo; RESOURCES.md, prerequisites.md from ops repo)
→ create tmux session → Claude fetches prediction history (open + closed) → → create tmux session → Claude fetches prediction history (open + closed) →
reviews track record (actioned/dismissed/watching) → finds weaknesses reviews track record (actioned/dismissed/watching) → finds weaknesses

View file

@ -1,12 +1,12 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# ============================================================================= # =============================================================================
# predictor-run.sh — Polling-loop wrapper: predictor execution via SDK + formula # predictor-run.sh — Cron wrapper: predictor execution via SDK + formula
# #
# Synchronous bash loop using claude -p (one-shot invocation). # Synchronous bash loop using claude -p (one-shot invocation).
# No tmux sessions, no phase files — the bash script IS the state machine. # No tmux sessions, no phase files — the bash script IS the state machine.
# #
# Flow: # Flow:
# 1. Guards: run lock, memory check # 1. Guards: cron lock, memory check
# 2. Load formula (formulas/run-predictor.toml) # 2. Load formula (formulas/run-predictor.toml)
# 3. Context: AGENTS.md, ops:RESOURCES.md, VISION.md, structural graph # 3. Context: AGENTS.md, ops:RESOURCES.md, VISION.md, structural graph
# 4. agent_run(worktree, prompt) → Claude analyzes, writes to ops repo # 4. agent_run(worktree, prompt) → Claude analyzes, writes to ops repo
@ -14,7 +14,7 @@
# Usage: # Usage:
# predictor-run.sh [projects/disinto.toml] # project config (default: disinto) # predictor-run.sh [projects/disinto.toml] # project config (default: disinto)
# #
# Called by: entrypoint.sh polling loop (daily) # Cron: 0 6 * * * cd /path/to/dark-factory && bash predictor/predictor-run.sh
# ============================================================================= # =============================================================================
set -euo pipefail set -euo pipefail
@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
# shellcheck source=../lib/agent-sdk.sh # shellcheck source=../lib/agent-sdk.sh
source "$FACTORY_ROOT/lib/agent-sdk.sh" source "$FACTORY_ROOT/lib/agent-sdk.sh"
LOG_FILE="${DISINTO_LOG_DIR}/predictor/predictor.log" LOG_FILE="$SCRIPT_DIR/predictor.log"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh # shellcheck disable=SC2034 # consumed by agent-sdk.sh
LOGFILE="$LOG_FILE" LOGFILE="$LOG_FILE"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh # shellcheck disable=SC2034 # consumed by agent-sdk.sh
@ -44,40 +44,22 @@ SID_FILE="/tmp/predictor-session-${PROJECT_NAME}.sid"
SCRATCH_FILE="/tmp/predictor-${PROJECT_NAME}-scratch.md" SCRATCH_FILE="/tmp/predictor-${PROJECT_NAME}-scratch.md"
WORKTREE="/tmp/${PROJECT_NAME}-predictor-run" WORKTREE="/tmp/${PROJECT_NAME}-predictor-run"
# Override LOG_AGENT for consistent agent identification log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
LOG_AGENT="predictor"
# Override log() to append to predictor-specific log file
# shellcheck disable=SC2034
log() {
local agent="${LOG_AGENT:-predictor}"
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
}
# ── Guards ──────────────────────────────────────────────────────────────── # ── Guards ────────────────────────────────────────────────────────────────
check_active predictor check_active predictor
acquire_run_lock "/tmp/predictor-run.lock" acquire_cron_lock "/tmp/predictor-run.lock"
memory_guard 2000 check_memory 2000
log "--- Predictor run start ---" log "--- Predictor run start ---"
# ── Resolve forge remote for git operations ─────────────────────────────
resolve_forge_remote
# ── Resolve agent identity for .profile repo ────────────────────────────
resolve_agent_identity || true
# ── Load formula + context ─────────────────────────────────────────────── # ── Load formula + context ───────────────────────────────────────────────
load_formula_or_profile "predictor" "$FACTORY_ROOT/formulas/run-predictor.toml" || exit 1 load_formula "$FACTORY_ROOT/formulas/run-predictor.toml"
build_context_block AGENTS.md ops:RESOURCES.md VISION.md ops:prerequisites.md build_context_block AGENTS.md ops:RESOURCES.md VISION.md ops:prerequisites.md
# ── Build structural analysis graph ────────────────────────────────────── # ── Build structural analysis graph ──────────────────────────────────────
build_graph_section build_graph_section
# ── Prepare .profile context (lessons injection) ─────────────────────────
formula_prepare_profile_context
# ── Read scratch file (compaction survival) ─────────────────────────────── # ── Read scratch file (compaction survival) ───────────────────────────────
SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE")
SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE")
@ -100,10 +82,9 @@ Use WebSearch for external signal scanning — be targeted (project dependencies
and tools only, not general news). Limit to 3 web searches per run. and tools only, not general news). Limit to 3 web searches per run.
## Project context ## Project context
${CONTEXT_BLOCK}$(formula_lessons_block) ${CONTEXT_BLOCK}
${GRAPH_SECTION} ${GRAPH_SECTION}
${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT} ${SCRATCH_CONTEXT}
}
## Formula ## Formula
${FORMULA_CONTENT} ${FORMULA_CONTENT}
@ -117,8 +98,5 @@ formula_worktree_setup "$WORKTREE"
agent_run --worktree "$WORKTREE" "$PROMPT" agent_run --worktree "$WORKTREE" "$PROMPT"
log "agent_run complete" log "agent_run complete"
# Write journal entry post-session
profile_write_journal "predictor-run" "Predictor run $(date -u +%Y-%m-%d)" "complete" "" || true
rm -f "$SCRATCH_FILE" rm -f "$SCRATCH_FILE"
log "--- Predictor run done ---" log "--- Predictor run done ---"

View file

@ -5,7 +5,7 @@
name = "disinto" name = "disinto"
repo = "johba/disinto" repo = "johba/disinto"
ops_repo = "disinto-admin/disinto-ops" ops_repo = "johba/disinto-ops"
forge_url = "http://localhost:3000" forge_url = "http://localhost:3000"
repo_root = "/home/YOU/dark-factory" repo_root = "/home/YOU/dark-factory"
ops_repo_root = "/home/YOU/disinto-ops" ops_repo_root = "/home/YOU/disinto-ops"
@ -23,24 +23,6 @@ check_prs = true
check_dev_agent = true check_dev_agent = true
check_pipeline_stall = false check_pipeline_stall = false
# Local-model agents (optional) — configure to use llama-server or similar
# for local LLM inference. Each agent gets its own container with isolated
# credentials and configuration.
#
# When enabled, `disinto init` automatically:
# 1. Creates a Forgejo bot user matching agents.llama.forge_user
# 2. Generates FORGE_TOKEN_<BOT> and FORGE_PASS_<BOT> (stored in .env.enc)
# 3. Adds the bot user as a write collaborator on the project repo
#
# [agents.llama]
# base_url = "http://10.10.10.1:8081"
# model = "unsloth/Qwen3.5-35B-A3B"
# api_key = "sk-no-key-required"
# roles = ["dev"]
# forge_user = "dev-qwen"
# compact_pct = 60
# poll_interval = 60
# [mirrors] # [mirrors]
# github = "git@github.com:johba/disinto.git" # github = "git@github.com:johba/disinto.git"
# codeberg = "git@codeberg.org:johba/disinto.git" # codeberg = "git@codeberg.org:johba/disinto.git"

View file

@ -1,29 +1,16 @@
<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 --> <!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
# Review Agent # Review Agent
**Role**: AI-powered PR review — post structured findings and formal **Role**: AI-powered PR review — post structured findings and formal
approve/request-changes verdicts to forge. approve/request-changes verdicts to forge.
**Trigger**: `review-poll.sh` is invoked by the polling loop in `docker/agents/entrypoint.sh` **Trigger**: `review-poll.sh` runs every 10 min via cron. It scans open PRs
every 5 minutes (iteration math at line 163-167). It scans open PRs whose CI has passed and whose CI has passed and that lack a review for the current HEAD SHA, then
that lack a review for the current HEAD SHA, then spawns `review-pr.sh <pr-number>`. spawns `review-pr.sh <pr-number>`.
**Key files**: **Key files**:
- `review/review-poll.sh` — Polling loop participant: finds unreviewed PRs with passing CI. - `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent.
Invoked by `docker/agents/entrypoint.sh` every 5 minutes. Sources `lib/guard.sh` and calls - `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Before starting the session, runs `lib/build-graph.py --changed-files <PR files>` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it.
`check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent.
**Circuit breaker**: counts existing `<!-- review-error: <sha> -->` comments; skips a PR
if ≥3 consecutive errors for the same HEAD SHA (prevents flooding on repeated review failures).
- `review/review-pr.sh` — Polling loop participant: Creates/reuses a tmux session
(`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output,
posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing
tech debt. **cd at startup**: changes to `$PROJECT_REPO_ROOT` early in the script — before
any git commands — because the factory root is not a git repo after image rebuild (#408).
Calls `resolve_forge_remote()` at startup to determine the correct git remote name (avoids
hardcoded 'origin'). Before starting the session, runs `lib/build-graph.py --changed-files
<PR files>` and appends the JSON structural analysis (affected objectives, orphaned
prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review
proceeds without it.
**Environment variables consumed**: **Environment variables consumed**:
- `FORGE_TOKEN` — Dev-agent token (must not be the same account as FORGE_REVIEW_TOKEN) - `FORGE_TOKEN` — Dev-agent token (must not be the same account as FORGE_REVIEW_TOKEN)

View file

@ -23,15 +23,8 @@ LOGFILE="${DISINTO_LOG_DIR}/review/review-poll.log"
MAX_REVIEWS=3 MAX_REVIEWS=3
REVIEW_IDLE_TIMEOUT=14400 # 4h: kill review session if idle REVIEW_IDLE_TIMEOUT=14400 # 4h: kill review session if idle
# Override LOG_AGENT for consistent agent identification
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
LOG_AGENT="review"
# Override log() to append to review-specific log file
# shellcheck disable=SC2034
log() { log() {
local agent="${LOG_AGENT:-review}" printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOGFILE"
} }
# Log rotation # Log rotation
@ -133,11 +126,10 @@ if [ -n "$REVIEW_SIDS" ]; then
log " #${pr_num} re-review: new commits (${reviewed_sha:0:7}${current_sha:0:7})" log " #${pr_num} re-review: new commits (${reviewed_sha:0:7}${current_sha:0:7})"
review_output=$("${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1) && review_rc=0 || review_rc=$? if "${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1; then
if [ "$review_rc" -eq 0 ]; then
REVIEWED=$((REVIEWED + 1)) REVIEWED=$((REVIEWED + 1))
else else
log " #${pr_num} re-review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" log " #${pr_num} re-review failed"
fi fi
[ "$REVIEWED" -lt "$MAX_REVIEWS" ] || break [ "$REVIEWED" -lt "$MAX_REVIEWS" ] || break
@ -174,25 +166,10 @@ while IFS= read -r line; do
log " #${PR_NUM} needs review (CI=success, SHA=${PR_SHA:0:7})" log " #${PR_NUM} needs review (CI=success, SHA=${PR_SHA:0:7})"
# Circuit breaker: count existing review-error comments for this SHA if "${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1; then
ERROR_COMMENTS=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${API_BASE}/issues/${PR_NUM}/comments" | \
jq --arg sha "$PR_SHA" \
'[.[] | select(.body | contains("<!-- review-error: " + $sha + " -->"))] | length')
if [ "${ERROR_COMMENTS:-0}" -ge 3 ]; then
log " #${PR_NUM} blocked: ${ERROR_COMMENTS} consecutive error comments for ${PR_SHA:0:7}, skipping"
SKIPPED=$((SKIPPED + 1))
continue
fi
log " #${PR_NUM} error check: ${ERROR_COMMENTS:-0} prior error(s) for ${PR_SHA:0:7}"
review_output=$("${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1) && review_rc=0 || review_rc=$?
if [ "$review_rc" -eq 0 ]; then
REVIEWED=$((REVIEWED + 1)) REVIEWED=$((REVIEWED + 1))
else else
log " #${PR_NUM} review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" log " #${PR_NUM} review failed"
fi fi
if [ "$REVIEWED" -ge "$MAX_REVIEWS" ]; then if [ "$REVIEWED" -ge "$MAX_REVIEWS" ]; then

Some files were not shown because too many files have changed in this diff Show more