From a604971613df64dc0b3d8307417b3d537711be5d Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 28 Mar 2026 11:07:25 +0000 Subject: [PATCH 1/6] fix: Migrate action-agent.sh to SDK + shared libraries (#5) Co-Authored-By: Claude Opus 4.6 (1M context) --- action/action-agent.sh | 287 ++++++++++++++++++----------------------- 1 file changed, 126 insertions(+), 161 deletions(-) diff --git a/action/action-agent.sh b/action/action-agent.sh index ab44108..38d7d39 100755 --- a/action/action-agent.sh +++ b/action/action-agent.sh @@ -1,73 +1,72 @@ #!/usr/bin/env bash -# action-agent.sh — Autonomous action agent: tmux + Claude + action formula +# ============================================================================= +# action-agent.sh — Synchronous action agent: SDK + shared libraries +# +# Synchronous bash loop using claude -p (one-shot invocation). +# No tmux sessions, no phase files — the bash script IS the state machine. # # Usage: ./action-agent.sh [project.toml] # -# Lifecycle: -# 1. Fetch issue body (action formula) + existing comments -# 2. Create isolated git worktree: /tmp/action-{issue}-{timestamp} -# 3. Create tmux session: action-{project}-{issue_num} with interactive claude in worktree -# 4. Inject initial prompt: formula + comments + phase protocol instructions -# 5. Monitor phase file via monitor_phase_loop (shared with dev-agent) -# Path A (git output): Claude pushes → handler creates PR → CI poll → review -# injection → merge → cleanup (same loop as dev-agent via phase-handler.sh) -# Path B (no git output): Claude posts results → PHASE:done → cleanup -# 6. For human input: Claude writes PHASE:escalate; human responds via vault/forge -# 7. Cleanup on terminal phase: kill children, destroy worktree, remove temp files +# Flow: +# 1. Preflight: issue_check_deps(), memory guard, concurrency lock +# 2. Parse model from YAML front matter in issue body (custom model selection) +# 3. Worktree: worktree_create() for action isolation +# 4. Load formula from issue body +# 5. Build prompt: formula + prior non-bot comments (resume context) +# 6. agent_run(worktree, prompt) → Claude executes action, may push +# 7. If pushed: pr_walk_to_merge() from lib/pr-lifecycle.sh +# 8. Cleanup: worktree_cleanup(), issue_close() # -# Key principle: The runtime creates and destroys. The formula preserves. -# The formula must push results before signaling done — the worktree is nuked after. +# Action-specific (stays in runner): +# - YAML front matter parsing (model selection) +# - Bot username filtering for prior comments +# - Lifetime watchdog (MAX_LIFETIME=8h wall-clock cap) +# - Child process cleanup (docker compose, background jobs) # -# Session: action-{project}-{issue_num} (tmux) -# Log: action/action-poll-{project}.log - +# From shared libraries: +# - Issue lifecycle: lib/issue-lifecycle.sh +# - Worktree: lib/worktree.sh +# - PR lifecycle: lib/pr-lifecycle.sh +# - Agent SDK: lib/agent-sdk.sh +# +# Log: action/action-poll-{project}.log +# ============================================================================= set -euo pipefail ISSUE="${1:?Usage: action-agent.sh [project.toml]}" export PROJECT_TOML="${2:-${PROJECT_TOML:-}}" -source "$(dirname "$0")/../lib/env.sh" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" + +# shellcheck source=../lib/env.sh +source "$FACTORY_ROOT/lib/env.sh" # Use action-bot's own Forgejo identity (#747) FORGE_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}" -source "$(dirname "$0")/../lib/ci-helpers.sh" -source "$(dirname "$0")/../lib/agent-session.sh" -source "$(dirname "$0")/../lib/formula-session.sh" -source "$(dirname "$0")/../lib/worktree.sh" -# shellcheck source=../dev/phase-handler.sh -source "$(dirname "$0")/../dev/phase-handler.sh" -SESSION_NAME="action-${PROJECT_NAME}-${ISSUE}" +# shellcheck source=../lib/ci-helpers.sh +source "$FACTORY_ROOT/lib/ci-helpers.sh" +# shellcheck source=../lib/worktree.sh +source "$FACTORY_ROOT/lib/worktree.sh" +# shellcheck source=../lib/issue-lifecycle.sh +source "$FACTORY_ROOT/lib/issue-lifecycle.sh" +# shellcheck source=../lib/agent-sdk.sh +source "$FACTORY_ROOT/lib/agent-sdk.sh" +# shellcheck source=../lib/pr-lifecycle.sh +source "$FACTORY_ROOT/lib/pr-lifecycle.sh" + +BRANCH="action/issue-${ISSUE}" +WORKTREE="/tmp/action-${ISSUE}-$(date +%s)" LOCKFILE="/tmp/action-agent-${ISSUE}.lock" LOGFILE="${DISINTO_LOG_DIR}/action/action-poll-${PROJECT_NAME:-default}.log" -IDLE_TIMEOUT="${ACTION_IDLE_TIMEOUT:-14400}" # 4h default -MAX_LIFETIME="${ACTION_MAX_LIFETIME:-28800}" # 8h default wall-clock cap +# shellcheck disable=SC2034 # consumed by agent-sdk.sh +SID_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.sid" +MAX_LIFETIME="${ACTION_MAX_LIFETIME:-28800}" # 8h default wall-clock cap SESSION_START_EPOCH=$(date +%s) -# --- Phase handler globals (agent-specific; defaults in phase-handler.sh) --- -# shellcheck disable=SC2034 # used by phase-handler.sh -API="${FORGE_API}" -BRANCH="action/issue-${ISSUE}" -# shellcheck disable=SC2034 # used by phase-handler.sh -WORKTREE="/tmp/action-${ISSUE}-$(date +%s)" -PHASE_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.phase" -IMPL_SUMMARY_FILE="/tmp/action-impl-summary-${PROJECT_NAME:-default}-${ISSUE}.txt" -PREFLIGHT_RESULT="/tmp/action-preflight-${ISSUE}.json" -SCRATCH_FILE="/tmp/action-${ISSUE}-scratch.md" - log() { printf '[%s] action#%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$ISSUE" "$*" >> "$LOGFILE" } -status() { - log "$*" -} - -# --- Action-specific helpers for phase-handler.sh --- -cleanup_worktree() { - worktree_cleanup "$WORKTREE" - log "destroyed worktree: ${WORKTREE}" -} -cleanup_labels() { :; } # action agent doesn't use in-progress labels - # --- Concurrency lock (per issue) --- if [ -f "$LOCKFILE" ]; then LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") @@ -87,7 +86,6 @@ cleanup() { wait "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true fi rm -f "$LOCKFILE" - agent_kill_session "$SESSION_NAME" # Kill any remaining child processes spawned during the run local children children=$(jobs -p 2>/dev/null) || true @@ -100,23 +98,17 @@ cleanup() { # Best-effort docker cleanup for containers started during this action (cd "${WORKTREE}" 2>/dev/null && docker compose down 2>/dev/null) || true # Preserve worktree on crash for debugging; clean up on success - local final_phase="" - [ -f "$PHASE_FILE" ] && final_phase=$(head -1 "$PHASE_FILE" 2>/dev/null || true) - if [ "${final_phase:-}" = "PHASE:crashed" ] || [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ] || [ "$exit_code" -ne 0 ]; then - worktree_preserve "$WORKTREE" "crashed (exit=$exit_code, phase=${final_phase:-unknown})" + if [ "$exit_code" -ne 0 ]; then + worktree_preserve "$WORKTREE" "crashed (exit=$exit_code)" else - cleanup_worktree + worktree_cleanup "$WORKTREE" fi - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$PREFLIGHT_RESULT" + rm -f "$SID_FILE" } trap cleanup EXIT # --- Memory guard --- -AVAIL_MB=$(awk '/MemAvailable/ {printf "%d", $2/1024}' /proc/meminfo) -if [ "$AVAIL_MB" -lt 2000 ]; then - log "SKIP: only ${AVAIL_MB}MB available (need 2000MB)" - exit 0 -fi +memory_guard 2000 # --- Fetch issue --- log "fetching issue #${ISSUE}" @@ -139,25 +131,10 @@ fi log "Issue: ${ISSUE_TITLE}" -# --- Dependency check (skip before spawning Claude) --- -DEPS=$(printf '%s' "$ISSUE_BODY" | bash "${FACTORY_ROOT}/lib/parse-deps.sh") -if [ -n "$DEPS" ]; then - ALL_MET=true - while IFS= read -r dep; do - [ -z "$dep" ] && continue - DEP_STATE=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${dep}" | jq -r '.state // "open"') || DEP_STATE="open" - if [ "$DEP_STATE" != "closed" ]; then - log "SKIP: dependency #${dep} still open — not spawning session" - ALL_MET=false - break - fi - done <<< "$DEPS" - if [ "$ALL_MET" = false ]; then - rm -f "$LOCKFILE" - exit 0 - fi - log "all dependencies met" +# --- Dependency check (shared library) --- +if ! issue_check_deps "$ISSUE"; then + log "SKIP: issue #${ISSUE} blocked by: ${_ISSUE_BLOCKED_BY[*]}" + exit 0 fi # --- Extract model from YAML front matter (if present) --- @@ -191,28 +168,23 @@ if [ -n "$COMMENTS_JSON" ] && [ "$COMMENTS_JSON" != "null" ] && [ "$COMMENTS_JSO "[\(.user.login) at \(.created_at[:19])]\n\(.body)\n---"' 2>/dev/null || true) fi -# --- Create isolated worktree --- -log "creating worktree: ${WORKTREE}" +# --- Determine git remote --- cd "${PROJECT_REPO_ROOT}" - -# Determine which git remote corresponds to FORGE_URL _forge_host=$(echo "$FORGE_URL" | sed 's|https\?://||; s|/.*||') FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}') FORGE_REMOTE="${FORGE_REMOTE:-origin}" export FORGE_REMOTE +# --- Create isolated worktree --- +log "creating worktree: ${WORKTREE}" git fetch "${FORGE_REMOTE}" "${PRIMARY_BRANCH}" 2>/dev/null || true -if ! git worktree add "$WORKTREE" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" 2>&1; then +if ! worktree_create "$WORKTREE" "$BRANCH"; then log "ERROR: worktree creation failed" exit 1 fi log "worktree ready: ${WORKTREE}" -# --- Read scratch file (compaction survival) --- -SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") -SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") - -# --- Build initial prompt --- +# --- Build prompt --- PRIOR_SECTION="" if [ -n "$PRIOR_COMMENTS" ]; then PRIOR_SECTION="## Prior comments (resume context) @@ -222,19 +194,15 @@ ${PRIOR_COMMENTS} " fi -# Build phase protocol from shared function (Path B covered in Instructions section above) -PHASE_PROTOCOL_INSTRUCTIONS="$(build_phase_protocol_prompt "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "$BRANCH")" +GIT_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE") -# Write phase protocol to context file for compaction survival -write_compact_context "$PHASE_FILE" "$PHASE_PROTOCOL_INSTRUCTIONS" - -INITIAL_PROMPT="You are an action agent. Your job is to execute the action formula +PROMPT="You are an action agent. Your job is to execute the action formula in the issue below. ## Issue #${ISSUE}: ${ISSUE_TITLE} ${ISSUE_BODY} -${SCRATCH_CONTEXT} + ${PRIOR_SECTION}## Instructions 1. Read the action formula steps in the issue body carefully. @@ -248,29 +216,20 @@ ${PRIOR_SECTION}## Instructions \"${FORGE_API}/issues/${ISSUE}/comments\" \\ -d \"{\\\"body\\\": \\\"your comment here\\\"}\" -4. If a step requires human input or approval, write PHASE:escalate with a reason. - A human will review and respond via the forge. +4. If a step requires human input or approval, post a comment explaining what + is needed and stop — the orchestrator will block the issue. ### Path A: If this action produces code changes (e.g. config updates, baselines): - You are already in an isolated worktree at: ${WORKTREE} - - Create and switch to branch: git checkout -b ${BRANCH} + - You are on branch: ${BRANCH} - Make your changes, commit, and push: git push ${FORGE_REMOTE} ${BRANCH} - **IMPORTANT:** The worktree is destroyed after completion. Push all - results before signaling done — unpushed work will be lost. - - Follow the phase protocol below — the orchestrator handles PR creation, - CI monitoring, and review injection. + results before finishing — unpushed work will be lost. ### Path B: If this action produces no code changes (investigation, report): - Post results as a comment on issue #${ISSUE}. - **IMPORTANT:** The worktree is destroyed after completion. Copy any - files you need to persistent paths before signaling done. - - Close the issue: - curl -sf -X PATCH \\ - -H \"Authorization: token \${FORGE_TOKEN}\" \\ - -H 'Content-Type: application/json' \\ - \"${FORGE_API}/issues/${ISSUE}\" \\ - -d '{\"state\": \"closed\"}' - - Signal completion: echo \"PHASE:done\" > \"${PHASE_FILE}\" + files you need to persistent paths before finishing. 5. Environment variables available in your bash sessions: FORGE_TOKEN, FORGE_API, FORGE_REPO, FORGE_WEB, PROJECT_NAME @@ -286,73 +245,79 @@ ${PRIOR_SECTION}## Instructions If the prior comments above show work already completed, resume from where it left off. -${SCRATCH_INSTRUCTION} - -${PHASE_PROTOCOL_INSTRUCTIONS}" - -# --- Create tmux session --- -log "creating tmux session: ${SESSION_NAME}" -if ! create_agent_session "${SESSION_NAME}" "${WORKTREE}" "${PHASE_FILE}"; then - log "ERROR: failed to create tmux session" - exit 1 -fi - -# --- Inject initial prompt --- -inject_formula "${SESSION_NAME}" "${INITIAL_PROMPT}" -log "initial prompt injected into session" +${GIT_INSTRUCTIONS}" # --- Wall-clock lifetime watchdog (background) --- -# Caps total session time independently of idle timeout. When the cap is -# hit the watchdog kills the tmux session, posts a summary comment on the -# issue, and writes PHASE:failed so monitor_phase_loop exits. +# Caps total run time independently of claude -p timeout. When the cap is +# hit the watchdog kills the main process, which triggers cleanup via trap. _lifetime_watchdog() { local remaining=$(( MAX_LIFETIME - ($(date +%s) - SESSION_START_EPOCH) )) [ "$remaining" -le 0 ] && remaining=1 sleep "$remaining" local hours=$(( MAX_LIFETIME / 3600 )) - log "MAX_LIFETIME (${hours}h) reached — killing session" - agent_kill_session "$SESSION_NAME" + log "MAX_LIFETIME (${hours}h) reached — killing agent" # Post summary comment on issue - local body="Action session killed: wall-clock lifetime cap (${hours}h) reached." + local body="Action agent killed: wall-clock lifetime cap (${hours}h) reached." curl -sf -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${ISSUE}/comments" \ -d "{\"body\": \"${body}\"}" >/dev/null 2>&1 || true - printf 'PHASE:failed\nReason: max_lifetime (%sh) reached\n' "$hours" > "$PHASE_FILE" - # Touch phase-changed marker so monitor_phase_loop picks up immediately - touch "/tmp/phase-changed-${SESSION_NAME}.marker" + kill $$ 2>/dev/null || true } _lifetime_watchdog & LIFETIME_WATCHDOG_PID=$! -# --- Monitor phase loop (shared with dev-agent) --- -status "monitoring phase: ${PHASE_FILE} (action agent)" -monitor_phase_loop "$PHASE_FILE" "$IDLE_TIMEOUT" _on_phase_change "$SESSION_NAME" +# --- Run agent --- +log "running agent (worktree: ${WORKTREE})" +agent_run --worktree "$WORKTREE" "$PROMPT" +log "agent_run complete" -# Handle exit reason from monitor_phase_loop -case "${_MONITOR_LOOP_EXIT:-}" in - idle_timeout) - # Post diagnostic comment + label blocked - post_blocked_diagnostic "idle_timeout" - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE" - ;; - idle_prompt) - # Notification + blocked label already handled by _on_phase_change(PHASE:failed) callback - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE" - ;; - PHASE:failed) - # Check if this was a max_lifetime kill (phase file contains the reason) - if grep -q 'max_lifetime' "$PHASE_FILE" 2>/dev/null; then - post_blocked_diagnostic "max_lifetime" - fi - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE" - ;; - done) - # Belt-and-suspenders: callback handles primary cleanup, - # but ensure sentinel files are removed if callback was interrupted - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE" - ;; -esac +# --- Detect if branch was pushed (Path A vs Path B) --- +PUSHED=false +# Check if remote branch exists +git fetch "${FORGE_REMOTE}" "$BRANCH" 2>/dev/null || true +if git rev-parse --verify "${FORGE_REMOTE}/${BRANCH}" >/dev/null 2>&1; then + PUSHED=true +fi +# Fallback: check local commits ahead of base +if [ "$PUSHED" = false ]; then + if git -C "$WORKTREE" log "${FORGE_REMOTE}/${PRIMARY_BRANCH}..${BRANCH}" --oneline 2>/dev/null | grep -q .; then + PUSHED=true + fi +fi + +if [ "$PUSHED" = true ]; then + # --- Path A: code changes pushed — create PR and walk to merge --- + log "branch pushed — creating PR" + PR_NUMBER="" + PR_NUMBER=$(pr_create "$BRANCH" "action: ${ISSUE_TITLE}" \ + "Closes #${ISSUE} + +Automated action execution by action-agent.") || true + + if [ -n "$PR_NUMBER" ]; then + log "walking PR #${PR_NUMBER} to merge" + pr_walk_to_merge "$PR_NUMBER" "$_AGENT_SESSION_ID" "$WORKTREE" || true + + case "${_PR_WALK_EXIT_REASON:-}" in + merged) + log "PR #${PR_NUMBER} merged — closing issue" + issue_close "$ISSUE" + ;; + *) + log "PR #${PR_NUMBER} not merged (reason: ${_PR_WALK_EXIT_REASON:-unknown})" + issue_block "$ISSUE" "pr_not_merged: ${_PR_WALK_EXIT_REASON:-unknown}" + ;; + esac + else + log "ERROR: failed to create PR" + issue_block "$ISSUE" "pr_creation_failed" + fi +else + # --- Path B: no code changes — close issue directly --- + log "no branch pushed — closing issue (Path B)" + issue_close "$ISSUE" +fi log "action-agent finished for issue #${ISSUE}" From ed43f9db11250af8639f80b0b0b0844b2ddbe7ad Mon Sep 17 00:00:00 2001 From: johba Date: Sat, 28 Mar 2026 11:08:55 +0000 Subject: [PATCH 2/6] docs: add CLAUDE.md skill file for factory setup and operations Comprehensive guide for AI coding agents (Claude Code, etc.) to: - Set up a new factory instance in an LXD container - Run disinto init and verify the stack - Configure mirrors to GitHub/Codeberg - Check on dev-agent, review-agent, and CI status - Unstick blocked issues and trigger manual polls - File issues for the factory to work on - Known workarounds for LXD nested Docker Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..cdfa205 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,246 @@ +# CLAUDE.md — Skill file for disinto + +## What is disinto? + +Disinto is an autonomous code factory — bash scripts + Claude CLI that automate the full +software development lifecycle: picking up issues, implementing via Claude, creating PRs, +running CI, reviewing, merging, and mirroring to external forges. + +Read `VISION.md` for the project philosophy, `AGENTS.md` for architecture, and +`BOOTSTRAP.md` for setup instructions. + +## Setting up a new factory instance + +### Prerequisites + +- An LXD container (Debian 12) with Docker, git, jq, curl, tmux, python3 (>=3.11) +- `claude` CLI installed and authenticated +- SSH key for mirror pushes (added to GitHub/Codeberg) + +### First-time setup + +1. **Clone the repo** and cd into it: + ```bash + git clone https://codeberg.org/johba/disinto.git && cd disinto + ``` + +2. **Run init** against the repo you want the factory to develop: + ```bash + bin/disinto init https://codeberg.org/org/repo --yes + ``` + For self-development (factory develops itself): + ```bash + bin/disinto init https://codeberg.org/johba/disinto --yes --repo-root $(pwd) + ``` + +3. **Verify the stack** came up: + ```bash + docker ps --format "table {{.Names}}\t{{.Status}}" + ``` + Expected: forgejo (Up), woodpecker (healthy), woodpecker-agent (healthy), agents (Up), + edge (Up), staging (Up). + +4. **Check WOODPECKER_TOKEN** was generated: + ```bash + grep WOODPECKER_TOKEN .env + ``` + If empty, see "Known issues" below. + +5. **Verify agent cron** is running: + ```bash + docker exec -u agent disinto-agents-1 crontab -l -u agent + ``` + +6. **Set up mirrors** (optional): + Edit `projects/.toml`: + ```toml + [mirrors] + github = "git@github.com:Org/repo.git" + codeberg = "git@codeberg.org:user/repo.git" + ``` + Ensure `~/.ssh` is mounted into the agents container and SSH keys are added + to the remote forges. The compose template includes the mount; just add your + public key to GitHub/Codeberg. + +### Post-init checklist + +- [ ] Stack containers all running and healthy +- [ ] `WOODPECKER_TOKEN` in `.env` is non-empty +- [ ] `projects/.toml` exists with correct `repo_root` and `primary_branch` +- [ ] Labels exist on Forgejo repo: backlog, in-progress, blocked, tech-debt, etc. +- [ ] Agent container can reach Forgejo API: `docker exec disinto-agents-1 bash -c "source /home/agent/disinto/.env && curl -sf http://forgejo:3000/api/v1/version"` +- [ ] Agent repo is cloned: `docker exec -u agent disinto-agents-1 ls /home/agent/repos/` + - If not: `docker exec disinto-agents-1 chown -R agent:agent /home/agent/repos && docker exec -u agent disinto-agents-1 bash -c "source /home/agent/disinto/.env && git clone http://dev-bot:\${FORGE_TOKEN}@forgejo:3000/org/repo.git /home/agent/repos/"` +- [ ] Create backlog issues on Forgejo for the factory to work on + +## Checking on the factory + +### Agent status + +```bash +# Are agents running? +docker exec disinto-agents-1 bash -c " + for f in /proc/[0-9]*/cmdline; do + cmd=\$(tr '\0' ' ' < \$f 2>/dev/null) + echo \$cmd | grep -qi claude && echo PID \$(echo \$f | cut -d/ -f3): running + done +" + +# Latest dev-agent activity +docker exec disinto-agents-1 tail -20 /home/agent/data/logs/dev/dev-agent.log + +# Latest poll activity +docker exec disinto-agents-1 tail -20 /home/agent/data/logs/dev/dev-agent-.log +``` + +### Issue and PR status + +```bash +source .env +# Open issues +curl -sf "http://localhost:3000/api/v1/repos///issues?state=open" \ + -H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' + +# Open PRs +curl -sf "http://localhost:3000/api/v1/repos///pulls?state=open" \ + -H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | "PR #\(.number) [\(.head.ref)] \(.title)"' +``` + +### CI status + +```bash +source .env +# Check pipelines (requires session cookie + CSRF for WP v3 API) +WP_CSRF=$(curl -sf -b "user_sess=$WOODPECKER_TOKEN" http://localhost:8000/web-config.js \ + | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') +curl -sf -b "user_sess=$WOODPECKER_TOKEN" -H "X-CSRF-Token: $WP_CSRF" \ + "http://localhost:8000/api/repos/1/pipelines?page=1&per_page=5" \ + | jq '.[] | {number, status, event}' +``` + +### Unsticking a blocked issue + +When a dev-agent run fails (CI timeout, implementation error), the issue gets labeled +`blocked`. To retry: + +```bash +source .env +# 1. Close stale PR if any +curl -sf -X PATCH "http://localhost:3000/api/v1/repos///pulls/" \ + -H "Authorization: token $FORGE_TOKEN" -H "Content-Type: application/json" \ + -d '{"state":"closed"}' + +# 2. Delete stale branch +curl -sf -X DELETE "http://localhost:3000/api/v1/repos///branches/fix/issue-" \ + -H "Authorization: token $FORGE_TOKEN" + +# 3. Remove locks +docker exec disinto-agents-1 rm -f /tmp/dev-agent-*.json /tmp/dev-agent-*.lock + +# 4. Relabel issue to backlog +BACKLOG_ID=$(curl -sf "http://localhost:3000/api/v1/repos///labels" \ + -H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | select(.name=="backlog") | .id') +curl -sf -X PUT "http://localhost:3000/api/v1/repos///issues//labels" \ + -H "Authorization: token $FORGE_TOKEN" -H "Content-Type: application/json" \ + -d "{\"labels\":[$BACKLOG_ID]}" + +# 5. Update agent repo to latest main +docker exec -u agent disinto-agents-1 bash -c \ + "cd /home/agent/repos/ && git fetch origin && git reset --hard origin/main" +``` + +The next cron cycle (every 5 minutes) will pick it up. + +### Triggering a poll manually + +```bash +docker exec -u agent disinto-agents-1 bash -c \ + "cd /home/agent/disinto && bash dev/dev-poll.sh projects/.toml" +``` + +## Filing issues + +The factory picks up issues labeled `backlog`. The dev-agent: +1. Claims the issue (labels it `in-progress`) +2. Creates a worktree on branch `fix/issue-` +3. Runs Claude to implement the fix +4. Pushes, creates a PR, waits for CI +5. Requests review from review-bot +6. Merges on approval, pushes to mirrors + +Issue body should contain enough context for Claude to implement it. Include: +- What's wrong or what needs to change +- Which files are affected +- Any design constraints +- Dependency references: `Depends-on: #N` (dev-agent checks these before starting) + +Use labels: +- `backlog` — ready for the dev-agent to pick up +- `blocked` — not ready (missing dependency, needs investigation) +- `in-progress` — claimed by dev-agent (set automatically) +- No label — parked, not for the factory to touch + +## Reverse tunnel access (for browser UI) + +If running in an LXD container with a reverse SSH tunnel to a jump host: + +```bash +# On the LXD container, add to /etc/systemd/system/reverse-tunnel.service: +# -R 127.0.0.1:13000:localhost:3000 (Forgejo) +# -R 127.0.0.1:18000:localhost:8000 (Woodpecker) + +# From your machine: +ssh -L 3000:localhost:13000 user@jump-host +# Then open http://localhost:3000 in your browser +``` + +Forgejo admin login: `disinto-admin` / set during init (or reset with +`docker exec disinto-forgejo-1 su -c "forgejo admin user change-password --username disinto-admin --password --must-change-password=false" git`). + +## Known issues & workarounds + +### WP CI agent needs host networking in LXD + +Docker bridge networking inside LXD breaks gRPC/HTTP2. The compose template uses +`network_mode: host` + `privileged: true` for the WP agent, connecting via +`localhost:9000`. This is baked into the template and works on regular VMs too. + +### CI step containers need Docker network + +The WP agent spawns CI containers that need to reach Forgejo for git clone. +`WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net` is set in the compose +template to put CI containers on the compose network. + +### Forgejo webhook allowlist + +Forgejo blocks outgoing webhooks by default. The compose template sets +`FORGEJO__webhook__ALLOWED_HOST_LIST: "private"` to allow delivery to +Docker-internal hosts. + +### OAuth2 token generation during init + +The init script drives a Forgejo OAuth2 flow to generate a Woodpecker token. +This requires rewriting URL-encoded Docker-internal hostnames and submitting +all Forgejo grant form fields. If token generation fails, check Forgejo logs +for "Unregistered Redirect URI" errors. + +### Woodpecker UI not accessible via tunnel + +The WP OAuth login redirects use Docker-internal hostnames that browsers can't +resolve. Use the Forgejo UI instead — CI results appear as commit statuses on PRs. + +### PROJECT_REPO_ROOT inside agents container + +The agents container needs `PROJECT_REPO_ROOT` set in its environment to +`/home/agent/repos/` (not the host path from the TOML). The compose +template includes this. If the agent fails with "cd: no such file or directory", +check this env var. + +## Code conventions + +See `AGENTS.md` for per-file architecture docs and coding conventions. +Key principles: +- Bash for checks, AI for judgment +- Zero LLM tokens when idle (cron checks are pure bash) +- Fire-and-forget mirror pushes (never block the pipeline) +- Issues are the unit of work; PRs are the delivery mechanism From cbe5df52b2a60785a398e85b3eb188f771fad27e Mon Sep 17 00:00:00 2001 From: johba Date: Sat, 28 Mar 2026 11:13:24 +0000 Subject: [PATCH 3/6] feat: add disinto-factory skill for guided setup and operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Distributable skill file (SKILL.md) that walks an AI agent through: - First-time factory setup with interactive [ASK] prompts - Post-init verification checklist - Mirror configuration to GitHub/Codeberg - Backlog seeding and issue creation - Ongoing monitoring: agent status, CI, PRs - Unsticking blocked issues Includes: - scripts/factory-status.sh — one-command factory health check - references/troubleshooting.md — common issues from real deployments - Slimmed CLAUDE.md pointing to the skill Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 248 +----------------- disinto-factory/SKILL.md | 210 +++++++++++++++ disinto-factory/references/troubleshooting.md | 53 ++++ disinto-factory/scripts/factory-status.sh | 44 ++++ 4 files changed, 318 insertions(+), 237 deletions(-) create mode 100644 disinto-factory/SKILL.md create mode 100644 disinto-factory/references/troubleshooting.md create mode 100755 disinto-factory/scripts/factory-status.sh diff --git a/CLAUDE.md b/CLAUDE.md index cdfa205..63927a1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,246 +1,20 @@ -# CLAUDE.md — Skill file for disinto +# CLAUDE.md -## What is disinto? +This repo is **disinto** — an autonomous code factory. -Disinto is an autonomous code factory — bash scripts + Claude CLI that automate the full -software development lifecycle: picking up issues, implementing via Claude, creating PRs, -running CI, reviewing, merging, and mirroring to external forges. +For setup and operations, load the `disinto-factory` skill from `disinto-factory/SKILL.md`. -Read `VISION.md` for the project philosophy, `AGENTS.md` for architecture, and -`BOOTSTRAP.md` for setup instructions. - -## Setting up a new factory instance - -### Prerequisites - -- An LXD container (Debian 12) with Docker, git, jq, curl, tmux, python3 (>=3.11) -- `claude` CLI installed and authenticated -- SSH key for mirror pushes (added to GitHub/Codeberg) - -### First-time setup - -1. **Clone the repo** and cd into it: - ```bash - git clone https://codeberg.org/johba/disinto.git && cd disinto - ``` - -2. **Run init** against the repo you want the factory to develop: - ```bash - bin/disinto init https://codeberg.org/org/repo --yes - ``` - For self-development (factory develops itself): - ```bash - bin/disinto init https://codeberg.org/johba/disinto --yes --repo-root $(pwd) - ``` - -3. **Verify the stack** came up: - ```bash - docker ps --format "table {{.Names}}\t{{.Status}}" - ``` - Expected: forgejo (Up), woodpecker (healthy), woodpecker-agent (healthy), agents (Up), - edge (Up), staging (Up). - -4. **Check WOODPECKER_TOKEN** was generated: - ```bash - grep WOODPECKER_TOKEN .env - ``` - If empty, see "Known issues" below. - -5. **Verify agent cron** is running: - ```bash - docker exec -u agent disinto-agents-1 crontab -l -u agent - ``` - -6. **Set up mirrors** (optional): - Edit `projects/.toml`: - ```toml - [mirrors] - github = "git@github.com:Org/repo.git" - codeberg = "git@codeberg.org:user/repo.git" - ``` - Ensure `~/.ssh` is mounted into the agents container and SSH keys are added - to the remote forges. The compose template includes the mount; just add your - public key to GitHub/Codeberg. - -### Post-init checklist - -- [ ] Stack containers all running and healthy -- [ ] `WOODPECKER_TOKEN` in `.env` is non-empty -- [ ] `projects/.toml` exists with correct `repo_root` and `primary_branch` -- [ ] Labels exist on Forgejo repo: backlog, in-progress, blocked, tech-debt, etc. -- [ ] Agent container can reach Forgejo API: `docker exec disinto-agents-1 bash -c "source /home/agent/disinto/.env && curl -sf http://forgejo:3000/api/v1/version"` -- [ ] Agent repo is cloned: `docker exec -u agent disinto-agents-1 ls /home/agent/repos/` - - If not: `docker exec disinto-agents-1 chown -R agent:agent /home/agent/repos && docker exec -u agent disinto-agents-1 bash -c "source /home/agent/disinto/.env && git clone http://dev-bot:\${FORGE_TOKEN}@forgejo:3000/org/repo.git /home/agent/repos/"` -- [ ] Create backlog issues on Forgejo for the factory to work on - -## Checking on the factory - -### Agent status - -```bash -# Are agents running? -docker exec disinto-agents-1 bash -c " - for f in /proc/[0-9]*/cmdline; do - cmd=\$(tr '\0' ' ' < \$f 2>/dev/null) - echo \$cmd | grep -qi claude && echo PID \$(echo \$f | cut -d/ -f3): running - done -" - -# Latest dev-agent activity -docker exec disinto-agents-1 tail -20 /home/agent/data/logs/dev/dev-agent.log - -# Latest poll activity -docker exec disinto-agents-1 tail -20 /home/agent/data/logs/dev/dev-agent-.log -``` - -### Issue and PR status - -```bash -source .env -# Open issues -curl -sf "http://localhost:3000/api/v1/repos///issues?state=open" \ - -H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' - -# Open PRs -curl -sf "http://localhost:3000/api/v1/repos///pulls?state=open" \ - -H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | "PR #\(.number) [\(.head.ref)] \(.title)"' -``` - -### CI status - -```bash -source .env -# Check pipelines (requires session cookie + CSRF for WP v3 API) -WP_CSRF=$(curl -sf -b "user_sess=$WOODPECKER_TOKEN" http://localhost:8000/web-config.js \ - | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') -curl -sf -b "user_sess=$WOODPECKER_TOKEN" -H "X-CSRF-Token: $WP_CSRF" \ - "http://localhost:8000/api/repos/1/pipelines?page=1&per_page=5" \ - | jq '.[] | {number, status, event}' -``` - -### Unsticking a blocked issue - -When a dev-agent run fails (CI timeout, implementation error), the issue gets labeled -`blocked`. To retry: - -```bash -source .env -# 1. Close stale PR if any -curl -sf -X PATCH "http://localhost:3000/api/v1/repos///pulls/" \ - -H "Authorization: token $FORGE_TOKEN" -H "Content-Type: application/json" \ - -d '{"state":"closed"}' - -# 2. Delete stale branch -curl -sf -X DELETE "http://localhost:3000/api/v1/repos///branches/fix/issue-" \ - -H "Authorization: token $FORGE_TOKEN" - -# 3. Remove locks -docker exec disinto-agents-1 rm -f /tmp/dev-agent-*.json /tmp/dev-agent-*.lock - -# 4. Relabel issue to backlog -BACKLOG_ID=$(curl -sf "http://localhost:3000/api/v1/repos///labels" \ - -H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | select(.name=="backlog") | .id') -curl -sf -X PUT "http://localhost:3000/api/v1/repos///issues//labels" \ - -H "Authorization: token $FORGE_TOKEN" -H "Content-Type: application/json" \ - -d "{\"labels\":[$BACKLOG_ID]}" - -# 5. Update agent repo to latest main -docker exec -u agent disinto-agents-1 bash -c \ - "cd /home/agent/repos/ && git fetch origin && git reset --hard origin/main" -``` - -The next cron cycle (every 5 minutes) will pick it up. - -### Triggering a poll manually - -```bash -docker exec -u agent disinto-agents-1 bash -c \ - "cd /home/agent/disinto && bash dev/dev-poll.sh projects/.toml" -``` - -## Filing issues - -The factory picks up issues labeled `backlog`. The dev-agent: -1. Claims the issue (labels it `in-progress`) -2. Creates a worktree on branch `fix/issue-` -3. Runs Claude to implement the fix -4. Pushes, creates a PR, waits for CI -5. Requests review from review-bot -6. Merges on approval, pushes to mirrors - -Issue body should contain enough context for Claude to implement it. Include: -- What's wrong or what needs to change -- Which files are affected -- Any design constraints -- Dependency references: `Depends-on: #N` (dev-agent checks these before starting) - -Use labels: -- `backlog` — ready for the dev-agent to pick up -- `blocked` — not ready (missing dependency, needs investigation) -- `in-progress` — claimed by dev-agent (set automatically) -- No label — parked, not for the factory to touch - -## Reverse tunnel access (for browser UI) - -If running in an LXD container with a reverse SSH tunnel to a jump host: - -```bash -# On the LXD container, add to /etc/systemd/system/reverse-tunnel.service: -# -R 127.0.0.1:13000:localhost:3000 (Forgejo) -# -R 127.0.0.1:18000:localhost:8000 (Woodpecker) - -# From your machine: -ssh -L 3000:localhost:13000 user@jump-host -# Then open http://localhost:3000 in your browser -``` - -Forgejo admin login: `disinto-admin` / set during init (or reset with -`docker exec disinto-forgejo-1 su -c "forgejo admin user change-password --username disinto-admin --password --must-change-password=false" git`). - -## Known issues & workarounds - -### WP CI agent needs host networking in LXD - -Docker bridge networking inside LXD breaks gRPC/HTTP2. The compose template uses -`network_mode: host` + `privileged: true` for the WP agent, connecting via -`localhost:9000`. This is baked into the template and works on regular VMs too. - -### CI step containers need Docker network - -The WP agent spawns CI containers that need to reach Forgejo for git clone. -`WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net` is set in the compose -template to put CI containers on the compose network. - -### Forgejo webhook allowlist - -Forgejo blocks outgoing webhooks by default. The compose template sets -`FORGEJO__webhook__ALLOWED_HOST_LIST: "private"` to allow delivery to -Docker-internal hosts. - -### OAuth2 token generation during init - -The init script drives a Forgejo OAuth2 flow to generate a Woodpecker token. -This requires rewriting URL-encoded Docker-internal hostnames and submitting -all Forgejo grant form fields. If token generation fails, check Forgejo logs -for "Unregistered Redirect URI" errors. - -### Woodpecker UI not accessible via tunnel - -The WP OAuth login redirects use Docker-internal hostnames that browsers can't -resolve. Use the Forgejo UI instead — CI results appear as commit statuses on PRs. - -### PROJECT_REPO_ROOT inside agents container - -The agents container needs `PROJECT_REPO_ROOT` set in its environment to -`/home/agent/repos/` (not the host path from the TOML). The compose -template includes this. If the agent fails with "cd: no such file or directory", -check this env var. +Quick references: +- `AGENTS.md` — per-agent architecture and file-level docs +- `VISION.md` — project philosophy +- `BOOTSTRAP.md` — detailed init walkthrough +- `disinto-factory/references/troubleshooting.md` — common issues and fixes +- `disinto-factory/scripts/factory-status.sh` — quick status check ## Code conventions -See `AGENTS.md` for per-file architecture docs and coding conventions. -Key principles: - Bash for checks, AI for judgment -- Zero LLM tokens when idle (cron checks are pure bash) +- Zero LLM tokens when idle (cron polls are pure bash) - Fire-and-forget mirror pushes (never block the pipeline) - Issues are the unit of work; PRs are the delivery mechanism +- See `AGENTS.md` for per-file watermarks and coding conventions diff --git a/disinto-factory/SKILL.md b/disinto-factory/SKILL.md new file mode 100644 index 0000000..45186fc --- /dev/null +++ b/disinto-factory/SKILL.md @@ -0,0 +1,210 @@ +--- +name: disinto-factory +description: Set up and operate a disinto autonomous code factory. Use when bootstrapping a new factory instance, checking on agents and CI, managing the backlog, or troubleshooting the stack. +--- + +# Disinto Factory + +You are helping the user set up and operate a **disinto autonomous code factory** — a system +of bash scripts and Claude CLI that automates the full development lifecycle: picking up +issues, implementing via Claude, creating PRs, running CI, reviewing, merging, and mirroring. + +## First-time setup + +Walk the user through these steps interactively. Ask questions where marked with [ASK]. + +### 1. Environment + +[ASK] Where will the factory run? Options: +- **LXD container** (recommended for isolation) — need Debian 12, Docker, nesting enabled +- **Bare VM or server** — need Debian/Ubuntu with Docker +- **Existing container** — check prerequisites + +Verify prerequisites: +```bash +docker --version && git --version && jq --version && curl --version && tmux -V && python3 --version && claude --version +``` + +Any missing tool — help the user install it before continuing. + +### 2. Clone and init + +```bash +git clone https://codeberg.org/johba/disinto.git && cd disinto +``` + +[ASK] What repo should the factory develop? Options: +- **Itself** (self-development): `bin/disinto init https://codeberg.org/johba/disinto --yes --repo-root $(pwd)` +- **Another project**: `bin/disinto init --yes` + +Run the init and watch for: +- All bot users created (dev-bot, review-bot, etc.) +- `WOODPECKER_TOKEN` generated and saved +- Stack containers all started + +### 3. Post-init verification + +Run this checklist — fix any failures before proceeding: + +```bash +# Stack healthy? +docker ps --format "table {{.Names}}\t{{.Status}}" +# Expected: forgejo, woodpecker (healthy), woodpecker-agent (healthy), agents, edge, staging + +# Token generated? +grep WOODPECKER_TOKEN .env | grep -v "^$" && echo "OK" || echo "MISSING — see references/troubleshooting.md" + +# Agent cron active? +docker exec -u agent disinto-agents-1 crontab -l -u agent + +# Agent can reach Forgejo? +docker exec disinto-agents-1 bash -c "source /home/agent/disinto/.env && curl -sf http://forgejo:3000/api/v1/version | jq .version" + +# Agent repo cloned? +docker exec -u agent disinto-agents-1 ls /home/agent/repos/ +``` + +If the agent repo is missing, clone it: +```bash +docker exec disinto-agents-1 chown -R agent:agent /home/agent/repos +docker exec -u agent disinto-agents-1 bash -c "source /home/agent/disinto/.env && git clone http://dev-bot:\${FORGE_TOKEN}@forgejo:3000//.git /home/agent/repos/" +``` + +### 4. Mirrors (optional) + +[ASK] Should the factory mirror to external forges? If yes, which? +- GitHub: need repo URL and SSH key added to GitHub account +- Codeberg: need repo URL and SSH key added to Codeberg account + +Show the user their public key: +```bash +cat ~/.ssh/id_ed25519.pub +``` + +Test SSH access: +```bash +ssh -T git@github.com 2>&1; ssh -T git@codeberg.org 2>&1 +``` + +If SSH host keys are missing: `ssh-keyscan github.com codeberg.org >> ~/.ssh/known_hosts 2>/dev/null` + +Edit `projects/.toml` to add mirrors: +```toml +[mirrors] +github = "git@github.com:Org/repo.git" +codeberg = "git@codeberg.org:user/repo.git" +``` + +Test with a manual push: +```bash +source .env && source lib/env.sh && export PROJECT_TOML=projects/.toml && source lib/load-project.sh && source lib/mirrors.sh && mirror_push +``` + +### 5. Seed the backlog + +[ASK] What should the factory work on first? Brainstorm with the user. + +Help them create issues on the local Forgejo. Each issue needs: +- A clear title prefixed with `fix:`, `feat:`, or `chore:` +- A body describing what to change, which files, and any constraints +- The `backlog` label (so the dev-agent picks it up) + +```bash +source .env +BACKLOG_ID=$(curl -sf "http://localhost:3000/api/v1/repos///labels" \ + -H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | select(.name=="backlog") | .id') + +curl -sf -X POST "http://localhost:3000/api/v1/repos///issues" \ + -H "Authorization: token $FORGE_TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"title\": \"\", \"body\": \"<body>\", \"labels\": [$BACKLOG_ID]}" +``` + +For issues with dependencies, add `Depends-on: #N` in the body — the dev-agent checks +these before starting. + +Use labels: +- `backlog` — ready for the dev-agent +- `blocked` — parked, not for the factory +- No label — tracked but not for autonomous work + +### 6. Watch it work + +The dev-agent polls every 5 minutes. Trigger manually to see it immediately: +```bash +docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/disinto && bash dev/dev-poll.sh projects/<name>.toml" +``` + +Then monitor: +```bash +# Watch the agent work +docker exec disinto-agents-1 tail -f /home/agent/data/logs/dev/dev-agent.log + +# Check for Claude running +docker exec disinto-agents-1 bash -c "for f in /proc/[0-9]*/cmdline; do cmd=\$(tr '\0' ' ' < \$f 2>/dev/null); echo \$cmd | grep -q 'claude.*-p' && echo 'Claude is running'; done" +``` + +## Ongoing operations + +### Check factory status + +```bash +source .env + +# Issues +curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/issues?state=open" \ + -H "Authorization: token $FORGE_TOKEN" \ + | jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' + +# PRs +curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/pulls?state=open" \ + -H "Authorization: token $FORGE_TOKEN" \ + | jq -r '.[] | "PR #\(.number) [\(.head.ref)] \(.title)"' + +# Agent logs +docker exec disinto-agents-1 tail -20 /home/agent/data/logs/dev/dev-agent.log +``` + +### Check CI + +```bash +source .env +WP_CSRF=$(curl -sf -b "user_sess=$WOODPECKER_TOKEN" http://localhost:8000/web-config.js \ + | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') +curl -sf -b "user_sess=$WOODPECKER_TOKEN" -H "X-CSRF-Token: $WP_CSRF" \ + "http://localhost:8000/api/repos/1/pipelines?page=1&per_page=5" \ + | jq '.[] | {number, status, event}' +``` + +### Unstick a blocked issue + +When a dev-agent run fails (CI timeout, implementation error), the issue gets labeled `blocked`: + +1. Close stale PR and delete the branch +2. `docker exec disinto-agents-1 rm -f /tmp/dev-agent-*.json /tmp/dev-agent-*.lock` +3. Relabel the issue to `backlog` +4. Update agent repo: `docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/repos/<name> && git fetch origin && git reset --hard origin/main"` + +### Access Forgejo UI + +If running in an LXD container with reverse tunnel: +```bash +# From your machine: +ssh -L 3000:localhost:13000 user@jump-host +# Open http://localhost:3000 +``` + +Reset admin password if needed: +```bash +docker exec disinto-forgejo-1 su -c "forgejo admin user change-password --username disinto-admin --password <new-pw> --must-change-password=false" git +``` + +## Important context + +- Read `AGENTS.md` for per-agent architecture and file-level docs +- Read `VISION.md` for project philosophy +- Read `BOOTSTRAP.md` for detailed init walkthrough +- The factory uses a single internal Forgejo as its forge, regardless of where mirrors go +- Dev-agent uses `claude -p --resume` for session continuity across CI/review cycles +- Mirror pushes happen automatically after every merge (fire-and-forget) +- Cron schedule: dev-poll every 5min, review-poll every 5min, gardener 4x/day diff --git a/disinto-factory/references/troubleshooting.md b/disinto-factory/references/troubleshooting.md new file mode 100644 index 0000000..0d1b282 --- /dev/null +++ b/disinto-factory/references/troubleshooting.md @@ -0,0 +1,53 @@ +# Troubleshooting + +## WOODPECKER_TOKEN empty after init + +The OAuth2 flow failed. Common causes: + +1. **URL-encoded redirect_uri mismatch**: Forgejo logs show "Unregistered Redirect URI". + The init script must rewrite both plain and URL-encoded Docker hostnames. + +2. **Forgejo must_change_password**: Admin user was created with forced password change. + The init script calls `--must-change-password=false` but Forgejo 11.x sometimes ignores it. + +3. **WOODPECKER_OPEN not set**: WP refuses first-user OAuth registration without it. + +Manual fix: reset admin password and re-run the token generation manually, or +use the Woodpecker UI to create a token. + +## WP CI agent won't connect (DeadlineExceeded) + +gRPC over Docker bridge fails in LXD (and possibly other nested container environments). +The compose template uses `network_mode: host` + `privileged: true` for the agent. +If you see this error, check: +- Server exposes port 9000: `grep "9000:9000" docker-compose.yml` +- Agent uses `localhost:9000`: `grep "WOODPECKER_SERVER" docker-compose.yml` +- Agent has `network_mode: host` + +## CI clone fails (could not resolve host) + +CI containers need to resolve Docker service names (e.g., `forgejo`). +Check `WOODPECKER_BACKEND_DOCKER_NETWORK` is set on the agent. + +## Webhooks not delivered + +Forgejo blocks outgoing webhooks by default. Check: +```bash +docker logs disinto-forgejo-1 2>&1 | grep "webhook.*ALLOWED_HOST_LIST" +``` +Fix: add `FORGEJO__webhook__ALLOWED_HOST_LIST: "private"` to Forgejo environment. + +Also verify the webhook exists: +```bash +curl -sf -u "disinto-admin:<password>" "http://localhost:3000/api/v1/repos/<org>/<repo>/hooks" | jq '.[].config.url' +``` +If missing, deactivate and reactivate the repo in Woodpecker to auto-create it. + +## Dev-agent fails with "cd: no such file or directory" + +`PROJECT_REPO_ROOT` inside the agents container points to a host path that doesn't +exist in the container. Check the compose env: +```bash +docker inspect disinto-agents-1 --format '{{range .Config.Env}}{{println .}}{{end}}' | grep PROJECT_REPO_ROOT +``` +Should be `/home/agent/repos/<name>`, not `/home/<user>/<name>`. diff --git a/disinto-factory/scripts/factory-status.sh b/disinto-factory/scripts/factory-status.sh new file mode 100755 index 0000000..457ac9a --- /dev/null +++ b/disinto-factory/scripts/factory-status.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# factory-status.sh — Quick status check for a running disinto factory +set -euo pipefail + +FACTORY_ROOT="${1:-$(cd "$(dirname "$0")/../.." && pwd)}" +source "${FACTORY_ROOT}/.env" 2>/dev/null || { echo "No .env found at ${FACTORY_ROOT}"; exit 1; } + +FORGE_URL="${FORGE_URL:-http://localhost:3000}" +REPO=$(grep '^repo ' "${FACTORY_ROOT}/projects/"*.toml 2>/dev/null | head -1 | sed 's/.*= *"//;s/"//') +[ -z "$REPO" ] && { echo "No project TOML found"; exit 1; } + +echo "=== Stack ===" +docker ps --format "table {{.Names}}\t{{.Status}}" 2>/dev/null | grep disinto + +echo "" +echo "=== Open Issues ===" +curl -sf "${FORGE_URL}/api/v1/repos/${REPO}/issues?state=open&limit=20" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + | jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' 2>/dev/null || echo "(API error)" + +echo "" +echo "=== Open PRs ===" +curl -sf "${FORGE_URL}/api/v1/repos/${REPO}/pulls?state=open&limit=10" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + | jq -r '.[] | "PR #\(.number) [\(.head.ref)] \(.title)"' 2>/dev/null || echo "none" + +echo "" +echo "=== Agent Activity ===" +docker exec disinto-agents-1 bash -c "tail -5 /home/agent/data/logs/dev/dev-agent.log 2>/dev/null" || echo "(no logs)" + +echo "" +echo "=== Claude Running? ===" +docker exec disinto-agents-1 bash -c " + found=false + for f in /proc/[0-9]*/cmdline; do + cmd=\$(tr '\0' ' ' < \"\$f\" 2>/dev/null) + if echo \"\$cmd\" | grep -q 'claude.*-p'; then found=true; echo 'Yes — Claude is actively working'; break; fi + done + \$found || echo 'No — idle' +" 2>/dev/null + +echo "" +echo "=== Mirrors ===" +cd "${FACTORY_ROOT}" 2>/dev/null && git remote -v | grep -E 'github|codeberg' | grep push || echo "none configured" From 02dd03eaaf9deb3f508d9742fa6f6ad370532fc0 Mon Sep 17 00:00:00 2001 From: johba <johba@disinto> Date: Sat, 28 Mar 2026 11:14:42 +0000 Subject: [PATCH 4/6] chore: remove BOOTSTRAP.md, slim CLAUDE.md BOOTSTRAP.md is superseded by the disinto-factory skill (SKILL.md). CLAUDE.md now just points to AGENTS.md and the skill. Updated AGENTS.md reference accordingly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --- AGENTS.md | 2 +- BOOTSTRAP.md | 460 --------------------------------------- CLAUDE.md | 18 +- disinto-factory/SKILL.md | 1 - 4 files changed, 3 insertions(+), 478 deletions(-) delete mode 100644 BOOTSTRAP.md diff --git a/AGENTS.md b/AGENTS.md index 4d5a91f..ffc5561 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,7 +8,7 @@ gardener, supervisor, planner, predictor, action, vault) that pick up issues fro implement them, review PRs, plan from the vision, gate dangerous actions, and keep the system healthy — all via cron and `claude -p`. -See `README.md` for the full architecture and `BOOTSTRAP.md` for setup. +See `README.md` for the full architecture and `disinto-factory/SKILL.md` for setup. ## Directory layout diff --git a/BOOTSTRAP.md b/BOOTSTRAP.md deleted file mode 100644 index 80e7408..0000000 --- a/BOOTSTRAP.md +++ /dev/null @@ -1,460 +0,0 @@ -# Bootstrapping a New Project - -How to point disinto at a new target project and get all agents running. - -## Prerequisites - -Before starting, ensure you have: - -- [ ] A **git repo** (GitHub, Codeberg, or any URL) with at least one issue labeled `backlog` -- [ ] A **Woodpecker CI** pipeline (`.woodpecker/` dir with at least one `.yml`) -- [ ] **Docker** installed (for local Forgejo provisioning) — or a running Forgejo instance -- [ ] A **local clone** of the target repo on the same machine as disinto -- [ ] `claude` CLI installed and authenticated (`claude --version`) -- [ ] `tmux` installed (`tmux -V`) — required for persistent dev sessions (issue #80+) - -## Quick Start - -The fastest path is `disinto init`, which provisions a local Forgejo instance, creates bot users and tokens, clones the repo, and sets up cron — all in one command: - -```bash -disinto init https://github.com/org/repo -``` - -This will: -1. Start a local Forgejo instance via Docker (at `http://localhost:3000`) -2. Create admin + bot users (dev-bot, review-bot) with API tokens -3. Create the repo on Forgejo and push your code -4. Generate a `projects/<name>.toml` config -5. Create standard labels (backlog, in-progress, blocked, etc.) -6. Install cron entries for the agents - -No external accounts or tokens needed. - -## 1. Secret Management (SOPS + age) - -Disinto encrypts secrets at rest using [SOPS](https://github.com/getsops/sops) with [age](https://age-encryption.org/) encryption. When `sops` and `age` are installed, `disinto init` automatically: - -1. Generates an age key at `~/.config/sops/age/keys.txt` (if none exists) -2. Creates `.sops.yaml` pinning the age public key -3. Encrypts all secrets into `.env.enc` (safe to commit) -4. Removes the plaintext `.env` - -**Install the tools:** - -```bash -# age (key generation) -apt install age # Debian/Ubuntu -brew install age # macOS - -# sops (encryption/decryption) -# Download from https://github.com/getsops/sops/releases -``` - -**The age private key** at `~/.config/sops/age/keys.txt` is the single file that must be protected. Back it up securely — without it, `.env.enc` cannot be decrypted. LUKS disk encryption on the VPS protects this key at rest. - -**Managing secrets after setup:** - -```bash -disinto secrets edit # Opens .env.enc in $EDITOR, re-encrypts on save -disinto secrets show # Prints decrypted secrets (for debugging) -disinto secrets migrate # Converts existing plaintext .env -> .env.enc -``` - -**Fallback:** If `sops`/`age` are not installed, `disinto init` writes secrets to a plaintext `.env` file with a warning. All agents load secrets transparently — `lib/env.sh` checks for `.env.enc` first, then falls back to `.env`. - -## 2. Configure `.env` - -```bash -cp .env.example .env -``` - -Fill in: - -```bash -# ── Forge (auto-populated by disinto init) ───────────────── -FORGE_URL=http://localhost:3000 # local Forgejo instance -FORGE_TOKEN= # dev-bot token (auto-generated) -FORGE_REVIEW_TOKEN= # review-bot token (auto-generated) - -# ── Woodpecker CI ─────────────────────────────────────────── -WOODPECKER_TOKEN=tok_xxxxxxxx -WOODPECKER_SERVER=http://localhost:8000 -# WOODPECKER_REPO_ID — now per-project, set in projects/*.toml [ci] section - -# Woodpecker Postgres (for direct pipeline queries) -WOODPECKER_DB_PASSWORD=secret -WOODPECKER_DB_USER=woodpecker -WOODPECKER_DB_HOST=127.0.0.1 -WOODPECKER_DB_NAME=woodpecker - -# ── Tuning ────────────────────────────────────────────────── -CLAUDE_TIMEOUT=7200 # seconds per Claude invocation -``` - -### Backwards compatibility - -If you have an existing deployment using `CODEBERG_TOKEN` / `REVIEW_BOT_TOKEN` in `.env`, those still work — `env.sh` falls back to the old names automatically. No migration needed. - -## 3. Configure Project TOML - -Each project needs a `projects/<name>.toml` file with box-specific settings -(absolute paths, Woodpecker CI IDs, forge URL). These files are -**gitignored** — they are local installation config, not shared code. - -To create one: - -```bash -# Automatic — generates TOML, clones repo, sets up cron: -disinto init https://github.com/org/repo - -# Manual — copy a template and fill in your values: -cp projects/myproject.toml.example projects/myproject.toml -vim projects/myproject.toml -``` - -The `forge_url` field in the TOML tells all agents where to find the forge API: - -```toml -name = "myproject" -repo = "org/myproject" -forge_url = "http://localhost:3000" -``` - -The repo ships `projects/*.toml.example` templates showing the expected -structure. See any `.toml.example` file for the full field reference. - -## 4. Claude Code Global Settings - -Configure `~/.claude/settings.json` with **only** permissions and `skipDangerousModePermissionPrompt`. Do not add hooks to the global settings — `agent-session.sh` injects per-worktree hooks automatically. - -Match the configuration from harb-staging exactly. The file should contain only permission grants and the dangerous-mode flag: - -```json -{ - "permissions": { - "allow": [ - "..." - ] - }, - "skipDangerousModePermissionPrompt": true -} -``` - -### Seed `~/.claude.json` - -Run `claude --dangerously-skip-permissions` once interactively to create `~/.claude.json`. This file must exist before cron-driven agents can run. - -```bash -claude --dangerously-skip-permissions -# Exit after it initializes successfully -``` - -## 5. File Ownership - -Everything under `/home/debian` must be owned by `debian:debian`. Root-owned files cause permission errors when agents run as the `debian` user. - -```bash -chown -R debian:debian /home/debian/harb /home/debian/dark-factory -``` - -Verify no root-owned files exist in agent temp directories: - -```bash -# These should return nothing -find /tmp/dev-* /tmp/harb-* /tmp/review-* -not -user debian 2>/dev/null -``` - -## 5b. Woodpecker CI + Forgejo Integration - -`disinto init` automatically configures Woodpecker to use the local Forgejo instance as its forge backend if `WOODPECKER_SERVER` is set in `.env`. This includes: - -1. Creating an OAuth2 application on Forgejo for Woodpecker -2. Writing `WOODPECKER_FORGEJO_*` env vars to `.env` -3. Activating the repo in Woodpecker - -### Manual setup (if Woodpecker runs outside of `disinto init`) - -If you manage Woodpecker separately, configure these env vars in its server config: - -```bash -WOODPECKER_FORGEJO=true -WOODPECKER_FORGEJO_URL=http://localhost:3000 -WOODPECKER_FORGEJO_CLIENT=<oauth2-client-id> -WOODPECKER_FORGEJO_SECRET=<oauth2-client-secret> -``` - -To create the OAuth2 app on Forgejo: - -```bash -# Create OAuth2 application (redirect URI = Woodpecker authorize endpoint) -curl -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "http://localhost:3000/api/v1/user/applications/oauth2" \ - -d '{"name":"woodpecker-ci","redirect_uris":["http://localhost:8000/authorize"],"confidential_client":true}' -``` - -The response contains `client_id` and `client_secret` for `WOODPECKER_FORGEJO_CLIENT` / `WOODPECKER_FORGEJO_SECRET`. - -To activate the repo in Woodpecker: - -```bash -woodpecker-cli repo add <org>/<repo> -# Or via API: -curl -X POST \ - -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ - "http://localhost:8000/api/repos" \ - -d '{"forge_remote_id":"<org>/<repo>"}' -``` - -Woodpecker will now trigger pipelines on pushes to Forgejo and push commit status back. Disinto queries Woodpecker directly for CI status (with a forge API fallback), so pipeline results are visible even if Woodpecker's status push to Forgejo is delayed. - -## 6. Prepare the Target Repo - -### Required: CI pipeline - -The repo needs at least one Woodpecker pipeline. Disinto monitors CI status to decide when a PR is ready for review and when it can merge. - -### Required: `CLAUDE.md` - -Create a `CLAUDE.md` in the repo root. This is the context document that dev-agent and review-agent read before working. It should cover: - -- **What the project is** (one paragraph) -- **Tech stack** (languages, frameworks, DB) -- **How to build/run/test** (`npm install`, `npm test`, etc.) -- **Coding conventions** (import style, naming, linting rules) -- **Project structure** (key directories and what lives where) - -The dev-agent reads this file via `claude -p` before implementing any issue. The better this file, the better the output. - -### Required: Issue labels - -`disinto init` creates these automatically. If setting up manually, create these labels on the forge repo: - -| Label | Purpose | -|-------|---------| -| `backlog` | Issues ready to be picked up by dev-agent | -| `in-progress` | Managed by dev-agent (auto-applied, auto-removed) | - -Optional but recommended: - -| Label | Purpose | -|-------|---------| -| `tech-debt` | Gardener can promote these to `backlog` | -| `blocked` | Dev-agent marks issues with unmet dependencies | -| `formula` | **Not yet functional.** Formula dispatch lives on the unmerged `feat/formula` branch. Dev-agent will skip any issue with this label until that branch is merged. Template files exist in `formulas/` for future use. | - -### Required: Branch protection - -On Forgejo, set up branch protection for your primary branch: - -- **Require pull request reviews**: enabled -- **Required approvals**: 1 (from the review bot account) -- **Restrict push**: only allow merges via PR - -This ensures dev-agent can't merge its own PRs — it must wait for review-agent (running as the bot account) to approve. - -> **Common pitfall:** Approvals alone are not enough. You must also: -> 1. Add `review-bot` as a **write** collaborator on the repo (Settings → Collaborators) -> 2. Set both `approvals_whitelist_username` **and** `merge_whitelist_usernames` to include `review-bot` in the branch protection rule -> -> Without write access, the bot's approval is counted but the merge API returns HTTP 405. - -### Required: Seed the `AGENTS.md` tree - -The planner maintains an `AGENTS.md` tree — architecture docs with -per-file `<!-- last-reviewed: SHA -->` watermarks. You must seed this before -the first planner run, otherwise the planner sees no watermarks and treats the -entire repo as "new", generating a noisy first-run diff. - -1. **Create `AGENTS.md` in the repo root** with a one-page overview of the - project: what it is, tech stack, directory layout, key conventions. Link - to sub-directory AGENTS.md files. - -2. **Create sub-directory `AGENTS.md` files** for each major directory - (e.g. `frontend/AGENTS.md`, `backend/AGENTS.md`). Keep each under ~200 - lines — architecture and conventions, not implementation details. - -3. **Set the watermark** on line 1 of every AGENTS.md file to the current HEAD: - ```bash - SHA=$(git rev-parse --short HEAD) - for f in $(find . -name "AGENTS.md" -not -path "./.git/*"); do - sed -i "1s/^/<!-- last-reviewed: ${SHA} -->\n/" "$f" - done - ``` - -4. **Symlink `CLAUDE.md`** so Claude Code picks up the same file: - ```bash - ln -sf AGENTS.md CLAUDE.md - ``` - -5. Commit and push. The planner will now see 0 changes on its first run and - only update files when real commits land. - -See `formulas/run-planner.toml` (agents-update step) for the full AGENTS.md conventions. - -## 7. Write Good Issues - -Dev-agent works best with issues that have: - -- **Clear title** describing the change (e.g., "Add email validation to customer form") -- **Acceptance criteria** — what "done" looks like -- **Dependencies** — reference blocking issues with `#NNN` in the body or a `## Dependencies` section: - ``` - ## Dependencies - - #4 - - #7 - ``` - -Dev-agent checks that all referenced issues are closed (= merged) before starting work. If any are open, the issue is skipped and checked again next cycle. - -## 8. Install Cron - -```bash -crontab -e -``` - -### Single project - -Add (adjust paths): - -```cron -FACTORY_ROOT=/home/you/disinto - -# Supervisor — health checks, auto-healing (every 10 min) -0,10,20,30,40,50 * * * * $FACTORY_ROOT/supervisor/supervisor-poll.sh - -# Review agent — find unreviewed PRs (every 10 min, offset +3) -3,13,23,33,43,53 * * * * $FACTORY_ROOT/review/review-poll.sh $FACTORY_ROOT/projects/myproject.toml - -# Dev agent — find ready issues, implement (every 10 min, offset +6) -6,16,26,36,46,56 * * * * $FACTORY_ROOT/dev/dev-poll.sh $FACTORY_ROOT/projects/myproject.toml - -# Gardener — backlog grooming (daily) -15 8 * * * $FACTORY_ROOT/gardener/gardener-poll.sh - -# Planner — AGENTS.md maintenance + gap analysis (weekly) -0 9 * * 1 $FACTORY_ROOT/planner/planner-poll.sh -``` - -`review-poll.sh`, `dev-poll.sh`, and `gardener-poll.sh` all take a project TOML file as their first argument. - -### Multiple projects - -Stagger each project's polls so they don't overlap. With the example below, cross-project gaps are 2 minutes: - -```cron -FACTORY_ROOT=/home/you/disinto - -# Supervisor (shared) -0,10,20,30,40,50 * * * * $FACTORY_ROOT/supervisor/supervisor-poll.sh - -# Project A — review +3, dev +6 -3,13,23,33,43,53 * * * * $FACTORY_ROOT/review/review-poll.sh $FACTORY_ROOT/projects/project-a.toml -6,16,26,36,46,56 * * * * $FACTORY_ROOT/dev/dev-poll.sh $FACTORY_ROOT/projects/project-a.toml - -# Project B — review +8, dev +1 (2-min gap from project A) -8,18,28,38,48,58 * * * * $FACTORY_ROOT/review/review-poll.sh $FACTORY_ROOT/projects/project-b.toml -1,11,21,31,41,51 * * * * $FACTORY_ROOT/dev/dev-poll.sh $FACTORY_ROOT/projects/project-b.toml - -# Gardener — per-project backlog grooming (daily) -15 8 * * * $FACTORY_ROOT/gardener/gardener-poll.sh $FACTORY_ROOT/projects/project-a.toml -45 8 * * * $FACTORY_ROOT/gardener/gardener-poll.sh $FACTORY_ROOT/projects/project-b.toml - -# Planner — AGENTS.md maintenance + gap analysis (weekly) -0 9 * * 1 $FACTORY_ROOT/planner/planner-poll.sh -``` - -The staggered offsets prevent agents from competing for resources. Each project gets its own lock file (`/tmp/dev-agent-{name}.lock`) derived from the `name` field in its TOML, so concurrent runs across projects are safe. - -## 9. Verify - -```bash -# Should complete with "all clear" (no problems to fix) -bash supervisor/supervisor-poll.sh - -# Should list backlog issues (or "no backlog issues") -bash dev/dev-poll.sh - -# Should find no unreviewed PRs (or review one if exists) -bash review/review-poll.sh -``` - -Check logs after a few cycles: - -```bash -tail -30 supervisor/supervisor.log -tail -30 dev/dev-agent.log -tail -30 review/review.log -``` - -## Lifecycle - -Once running, the system operates autonomously: - -``` -You write issues (with backlog label) - → dev-poll finds ready issues - → dev-agent implements in a worktree, opens PR - → CI runs (Woodpecker) - → review-agent reviews, approves or requests changes - → dev-agent addresses feedback (if any) - → merge, close issue, clean up - -Meanwhile: - supervisor-poll monitors health, kills stale processes, manages resources - gardener grooms backlog: closes duplicates, promotes tech-debt, escalates ambiguity - planner rebuilds AGENTS.md from git history, gap-analyses against VISION.md -``` - -## Troubleshooting - -| Symptom | Check | -|---------|-------| -| Dev-agent not picking up issues | `cat /tmp/dev-agent.lock` — is another instance running? Issues labeled `backlog`? Dependencies met? | -| PR not getting reviewed | `tail review/review.log` — CI must pass first. Review bot token valid? | -| CI stuck | `bash lib/ci-debug.sh` — check Woodpecker. Rate-limited? (exit 128 = wait 15 min) | -| Claude not found | `which claude` — must be in PATH. Check `lib/env.sh` adds `~/.local/bin`. | -| Merge fails | Branch protection misconfigured? Review bot needs write access to the repo. | -| Memory issues | Supervisor auto-heals at <500 MB free. Check `supervisor/supervisor.log` for P0 alerts. | -| Works on one box but not another | Diff configs first (`~/.claude/settings.json`, `.env`, crontab, branch protection). Write code never — config mismatches are the #1 cause of cross-box failures. | - -### Multi-project common blockers - -| Symptom | Cause | Fix | -|---------|-------|-----| -| Dev-agent for project B never starts | Shared lock file path | Each TOML `name` field must be unique — lock is `/tmp/dev-agent-{name}.lock` | -| Review-poll skips all PRs | CI gate with no CI configured | Set `woodpecker_repo_id = 0` in the TOML `[ci]` section to bypass the CI check | -| Approved PRs never merge (HTTP 405) | `review-bot` not in merge/approvals whitelist | Add as write collaborator; set both `approvals_whitelist_username` and `merge_whitelist_usernames` in branch protection | -| Dev-agent churns through issues without waiting for open PRs to land | No single-threaded enforcement | `WAITING_PRS` check in dev-poll holds new work — verify TOML `name` is consistent across invocations | -| Label ping-pong (issue reopened then immediately re-closed) | `already_done` handler doesn't close issue | Review dev-agent log; `already_done` status should auto-close the issue | - -## Security: Docker Socket Sharing in CI - -The `woodpecker-agent` service mounts `/var/run/docker.sock` to execute `type: docker` CI pipelines. This grants root-equivalent access to the Docker host — any CI pipeline step can run privileged containers, mount arbitrary host paths, or access other containers' data. - -**Mitigations:** - -- **Run disinto in an LXD/VM container, not on bare metal.** When the Docker daemon runs inside an LXD container, LXD's user namespace mapping and resource limits contain the blast radius. A compromised CI step cannot reach the real host. -- **`WOODPECKER_MAX_WORKFLOWS: 1`** limits concurrent CI resource usage, preventing a runaway pipeline from exhausting host resources. -- **`WOODPECKER_AGENT_SECRET`** authenticates the agent↔server gRPC connection. `disinto init` auto-generates this secret and stores it in `.env` (or `.env.enc` when SOPS is available). -- Consider setting `WOODPECKER_BACKEND_DOCKER_VOLUMES` on the agent to restrict which host volumes CI pipelines can mount. - -**Threat model:** PRs are created by the dev-agent (Claude) and auto-reviewed by the review-bot. A crafted backlog issue could theoretically produce a PR whose CI step exploits the Docker socket. The LXD containment boundary is the primary defense — treat the LXD container as the trust boundary, not the Docker daemon inside it. - -## Action Runner — disinto (harb-staging) - -Added 2026-03-19. Polls disinto repo for `action`-labeled issues. - -``` -*/5 * * * * cd /home/debian/dark-factory && bash action/action-poll.sh projects/disinto.toml >> /tmp/action-disinto-cron.log 2>&1 -``` - -Runs locally on harb-staging — same box where Caddy/site live. For formulas that need local resources (publish-site, etc). - -### Fix applied: action-agent.sh needs +x -The script wasn't executable after git clone. Run: -```bash -chmod +x action/action-agent.sh action/action-poll.sh -``` diff --git a/CLAUDE.md b/CLAUDE.md index 63927a1..9671180 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -2,19 +2,5 @@ This repo is **disinto** — an autonomous code factory. -For setup and operations, load the `disinto-factory` skill from `disinto-factory/SKILL.md`. - -Quick references: -- `AGENTS.md` — per-agent architecture and file-level docs -- `VISION.md` — project philosophy -- `BOOTSTRAP.md` — detailed init walkthrough -- `disinto-factory/references/troubleshooting.md` — common issues and fixes -- `disinto-factory/scripts/factory-status.sh` — quick status check - -## Code conventions - -- Bash for checks, AI for judgment -- Zero LLM tokens when idle (cron polls are pure bash) -- Fire-and-forget mirror pushes (never block the pipeline) -- Issues are the unit of work; PRs are the delivery mechanism -- See `AGENTS.md` for per-file watermarks and coding conventions +Read `AGENTS.md` for architecture, coding conventions, and per-file documentation. +For setup and operations, load the `disinto-factory` skill (`disinto-factory/SKILL.md`). diff --git a/disinto-factory/SKILL.md b/disinto-factory/SKILL.md index 45186fc..8e17508 100644 --- a/disinto-factory/SKILL.md +++ b/disinto-factory/SKILL.md @@ -203,7 +203,6 @@ docker exec disinto-forgejo-1 su -c "forgejo admin user change-password --userna - Read `AGENTS.md` for per-agent architecture and file-level docs - Read `VISION.md` for project philosophy -- Read `BOOTSTRAP.md` for detailed init walkthrough - The factory uses a single internal Forgejo as its forge, regardless of where mirrors go - Dev-agent uses `claude -p --resume` for session continuity across CI/review cycles - Mirror pushes happen automatically after every merge (fire-and-forget) From 83ab2930e61f2b5ac0905a2f256dbff41bcb037a Mon Sep 17 00:00:00 2001 From: Claude <noreply@anthropic.com> Date: Sat, 28 Mar 2026 11:07:25 +0000 Subject: [PATCH 5/6] fix: Migrate action-agent.sh to SDK + shared libraries (#5) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --- action/action-agent.sh | 287 ++++++++++++++++++----------------------- 1 file changed, 126 insertions(+), 161 deletions(-) diff --git a/action/action-agent.sh b/action/action-agent.sh index ab44108..38d7d39 100755 --- a/action/action-agent.sh +++ b/action/action-agent.sh @@ -1,73 +1,72 @@ #!/usr/bin/env bash -# action-agent.sh — Autonomous action agent: tmux + Claude + action formula +# ============================================================================= +# action-agent.sh — Synchronous action agent: SDK + shared libraries +# +# Synchronous bash loop using claude -p (one-shot invocation). +# No tmux sessions, no phase files — the bash script IS the state machine. # # Usage: ./action-agent.sh <issue-number> [project.toml] # -# Lifecycle: -# 1. Fetch issue body (action formula) + existing comments -# 2. Create isolated git worktree: /tmp/action-{issue}-{timestamp} -# 3. Create tmux session: action-{project}-{issue_num} with interactive claude in worktree -# 4. Inject initial prompt: formula + comments + phase protocol instructions -# 5. Monitor phase file via monitor_phase_loop (shared with dev-agent) -# Path A (git output): Claude pushes → handler creates PR → CI poll → review -# injection → merge → cleanup (same loop as dev-agent via phase-handler.sh) -# Path B (no git output): Claude posts results → PHASE:done → cleanup -# 6. For human input: Claude writes PHASE:escalate; human responds via vault/forge -# 7. Cleanup on terminal phase: kill children, destroy worktree, remove temp files +# Flow: +# 1. Preflight: issue_check_deps(), memory guard, concurrency lock +# 2. Parse model from YAML front matter in issue body (custom model selection) +# 3. Worktree: worktree_create() for action isolation +# 4. Load formula from issue body +# 5. Build prompt: formula + prior non-bot comments (resume context) +# 6. agent_run(worktree, prompt) → Claude executes action, may push +# 7. If pushed: pr_walk_to_merge() from lib/pr-lifecycle.sh +# 8. Cleanup: worktree_cleanup(), issue_close() # -# Key principle: The runtime creates and destroys. The formula preserves. -# The formula must push results before signaling done — the worktree is nuked after. +# Action-specific (stays in runner): +# - YAML front matter parsing (model selection) +# - Bot username filtering for prior comments +# - Lifetime watchdog (MAX_LIFETIME=8h wall-clock cap) +# - Child process cleanup (docker compose, background jobs) # -# Session: action-{project}-{issue_num} (tmux) -# Log: action/action-poll-{project}.log - +# From shared libraries: +# - Issue lifecycle: lib/issue-lifecycle.sh +# - Worktree: lib/worktree.sh +# - PR lifecycle: lib/pr-lifecycle.sh +# - Agent SDK: lib/agent-sdk.sh +# +# Log: action/action-poll-{project}.log +# ============================================================================= set -euo pipefail ISSUE="${1:?Usage: action-agent.sh <issue-number> [project.toml]}" export PROJECT_TOML="${2:-${PROJECT_TOML:-}}" -source "$(dirname "$0")/../lib/env.sh" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" + +# shellcheck source=../lib/env.sh +source "$FACTORY_ROOT/lib/env.sh" # Use action-bot's own Forgejo identity (#747) FORGE_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}" -source "$(dirname "$0")/../lib/ci-helpers.sh" -source "$(dirname "$0")/../lib/agent-session.sh" -source "$(dirname "$0")/../lib/formula-session.sh" -source "$(dirname "$0")/../lib/worktree.sh" -# shellcheck source=../dev/phase-handler.sh -source "$(dirname "$0")/../dev/phase-handler.sh" -SESSION_NAME="action-${PROJECT_NAME}-${ISSUE}" +# shellcheck source=../lib/ci-helpers.sh +source "$FACTORY_ROOT/lib/ci-helpers.sh" +# shellcheck source=../lib/worktree.sh +source "$FACTORY_ROOT/lib/worktree.sh" +# shellcheck source=../lib/issue-lifecycle.sh +source "$FACTORY_ROOT/lib/issue-lifecycle.sh" +# shellcheck source=../lib/agent-sdk.sh +source "$FACTORY_ROOT/lib/agent-sdk.sh" +# shellcheck source=../lib/pr-lifecycle.sh +source "$FACTORY_ROOT/lib/pr-lifecycle.sh" + +BRANCH="action/issue-${ISSUE}" +WORKTREE="/tmp/action-${ISSUE}-$(date +%s)" LOCKFILE="/tmp/action-agent-${ISSUE}.lock" LOGFILE="${DISINTO_LOG_DIR}/action/action-poll-${PROJECT_NAME:-default}.log" -IDLE_TIMEOUT="${ACTION_IDLE_TIMEOUT:-14400}" # 4h default -MAX_LIFETIME="${ACTION_MAX_LIFETIME:-28800}" # 8h default wall-clock cap +# shellcheck disable=SC2034 # consumed by agent-sdk.sh +SID_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.sid" +MAX_LIFETIME="${ACTION_MAX_LIFETIME:-28800}" # 8h default wall-clock cap SESSION_START_EPOCH=$(date +%s) -# --- Phase handler globals (agent-specific; defaults in phase-handler.sh) --- -# shellcheck disable=SC2034 # used by phase-handler.sh -API="${FORGE_API}" -BRANCH="action/issue-${ISSUE}" -# shellcheck disable=SC2034 # used by phase-handler.sh -WORKTREE="/tmp/action-${ISSUE}-$(date +%s)" -PHASE_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.phase" -IMPL_SUMMARY_FILE="/tmp/action-impl-summary-${PROJECT_NAME:-default}-${ISSUE}.txt" -PREFLIGHT_RESULT="/tmp/action-preflight-${ISSUE}.json" -SCRATCH_FILE="/tmp/action-${ISSUE}-scratch.md" - log() { printf '[%s] action#%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$ISSUE" "$*" >> "$LOGFILE" } -status() { - log "$*" -} - -# --- Action-specific helpers for phase-handler.sh --- -cleanup_worktree() { - worktree_cleanup "$WORKTREE" - log "destroyed worktree: ${WORKTREE}" -} -cleanup_labels() { :; } # action agent doesn't use in-progress labels - # --- Concurrency lock (per issue) --- if [ -f "$LOCKFILE" ]; then LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") @@ -87,7 +86,6 @@ cleanup() { wait "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true fi rm -f "$LOCKFILE" - agent_kill_session "$SESSION_NAME" # Kill any remaining child processes spawned during the run local children children=$(jobs -p 2>/dev/null) || true @@ -100,23 +98,17 @@ cleanup() { # Best-effort docker cleanup for containers started during this action (cd "${WORKTREE}" 2>/dev/null && docker compose down 2>/dev/null) || true # Preserve worktree on crash for debugging; clean up on success - local final_phase="" - [ -f "$PHASE_FILE" ] && final_phase=$(head -1 "$PHASE_FILE" 2>/dev/null || true) - if [ "${final_phase:-}" = "PHASE:crashed" ] || [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ] || [ "$exit_code" -ne 0 ]; then - worktree_preserve "$WORKTREE" "crashed (exit=$exit_code, phase=${final_phase:-unknown})" + if [ "$exit_code" -ne 0 ]; then + worktree_preserve "$WORKTREE" "crashed (exit=$exit_code)" else - cleanup_worktree + worktree_cleanup "$WORKTREE" fi - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$PREFLIGHT_RESULT" + rm -f "$SID_FILE" } trap cleanup EXIT # --- Memory guard --- -AVAIL_MB=$(awk '/MemAvailable/ {printf "%d", $2/1024}' /proc/meminfo) -if [ "$AVAIL_MB" -lt 2000 ]; then - log "SKIP: only ${AVAIL_MB}MB available (need 2000MB)" - exit 0 -fi +memory_guard 2000 # --- Fetch issue --- log "fetching issue #${ISSUE}" @@ -139,25 +131,10 @@ fi log "Issue: ${ISSUE_TITLE}" -# --- Dependency check (skip before spawning Claude) --- -DEPS=$(printf '%s' "$ISSUE_BODY" | bash "${FACTORY_ROOT}/lib/parse-deps.sh") -if [ -n "$DEPS" ]; then - ALL_MET=true - while IFS= read -r dep; do - [ -z "$dep" ] && continue - DEP_STATE=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${dep}" | jq -r '.state // "open"') || DEP_STATE="open" - if [ "$DEP_STATE" != "closed" ]; then - log "SKIP: dependency #${dep} still open — not spawning session" - ALL_MET=false - break - fi - done <<< "$DEPS" - if [ "$ALL_MET" = false ]; then - rm -f "$LOCKFILE" - exit 0 - fi - log "all dependencies met" +# --- Dependency check (shared library) --- +if ! issue_check_deps "$ISSUE"; then + log "SKIP: issue #${ISSUE} blocked by: ${_ISSUE_BLOCKED_BY[*]}" + exit 0 fi # --- Extract model from YAML front matter (if present) --- @@ -191,28 +168,23 @@ if [ -n "$COMMENTS_JSON" ] && [ "$COMMENTS_JSON" != "null" ] && [ "$COMMENTS_JSO "[\(.user.login) at \(.created_at[:19])]\n\(.body)\n---"' 2>/dev/null || true) fi -# --- Create isolated worktree --- -log "creating worktree: ${WORKTREE}" +# --- Determine git remote --- cd "${PROJECT_REPO_ROOT}" - -# Determine which git remote corresponds to FORGE_URL _forge_host=$(echo "$FORGE_URL" | sed 's|https\?://||; s|/.*||') FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}') FORGE_REMOTE="${FORGE_REMOTE:-origin}" export FORGE_REMOTE +# --- Create isolated worktree --- +log "creating worktree: ${WORKTREE}" git fetch "${FORGE_REMOTE}" "${PRIMARY_BRANCH}" 2>/dev/null || true -if ! git worktree add "$WORKTREE" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" 2>&1; then +if ! worktree_create "$WORKTREE" "$BRANCH"; then log "ERROR: worktree creation failed" exit 1 fi log "worktree ready: ${WORKTREE}" -# --- Read scratch file (compaction survival) --- -SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") -SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") - -# --- Build initial prompt --- +# --- Build prompt --- PRIOR_SECTION="" if [ -n "$PRIOR_COMMENTS" ]; then PRIOR_SECTION="## Prior comments (resume context) @@ -222,19 +194,15 @@ ${PRIOR_COMMENTS} " fi -# Build phase protocol from shared function (Path B covered in Instructions section above) -PHASE_PROTOCOL_INSTRUCTIONS="$(build_phase_protocol_prompt "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "$BRANCH")" +GIT_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE") -# Write phase protocol to context file for compaction survival -write_compact_context "$PHASE_FILE" "$PHASE_PROTOCOL_INSTRUCTIONS" - -INITIAL_PROMPT="You are an action agent. Your job is to execute the action formula +PROMPT="You are an action agent. Your job is to execute the action formula in the issue below. ## Issue #${ISSUE}: ${ISSUE_TITLE} ${ISSUE_BODY} -${SCRATCH_CONTEXT} + ${PRIOR_SECTION}## Instructions 1. Read the action formula steps in the issue body carefully. @@ -248,29 +216,20 @@ ${PRIOR_SECTION}## Instructions \"${FORGE_API}/issues/${ISSUE}/comments\" \\ -d \"{\\\"body\\\": \\\"your comment here\\\"}\" -4. If a step requires human input or approval, write PHASE:escalate with a reason. - A human will review and respond via the forge. +4. If a step requires human input or approval, post a comment explaining what + is needed and stop — the orchestrator will block the issue. ### Path A: If this action produces code changes (e.g. config updates, baselines): - You are already in an isolated worktree at: ${WORKTREE} - - Create and switch to branch: git checkout -b ${BRANCH} + - You are on branch: ${BRANCH} - Make your changes, commit, and push: git push ${FORGE_REMOTE} ${BRANCH} - **IMPORTANT:** The worktree is destroyed after completion. Push all - results before signaling done — unpushed work will be lost. - - Follow the phase protocol below — the orchestrator handles PR creation, - CI monitoring, and review injection. + results before finishing — unpushed work will be lost. ### Path B: If this action produces no code changes (investigation, report): - Post results as a comment on issue #${ISSUE}. - **IMPORTANT:** The worktree is destroyed after completion. Copy any - files you need to persistent paths before signaling done. - - Close the issue: - curl -sf -X PATCH \\ - -H \"Authorization: token \${FORGE_TOKEN}\" \\ - -H 'Content-Type: application/json' \\ - \"${FORGE_API}/issues/${ISSUE}\" \\ - -d '{\"state\": \"closed\"}' - - Signal completion: echo \"PHASE:done\" > \"${PHASE_FILE}\" + files you need to persistent paths before finishing. 5. Environment variables available in your bash sessions: FORGE_TOKEN, FORGE_API, FORGE_REPO, FORGE_WEB, PROJECT_NAME @@ -286,73 +245,79 @@ ${PRIOR_SECTION}## Instructions If the prior comments above show work already completed, resume from where it left off. -${SCRATCH_INSTRUCTION} - -${PHASE_PROTOCOL_INSTRUCTIONS}" - -# --- Create tmux session --- -log "creating tmux session: ${SESSION_NAME}" -if ! create_agent_session "${SESSION_NAME}" "${WORKTREE}" "${PHASE_FILE}"; then - log "ERROR: failed to create tmux session" - exit 1 -fi - -# --- Inject initial prompt --- -inject_formula "${SESSION_NAME}" "${INITIAL_PROMPT}" -log "initial prompt injected into session" +${GIT_INSTRUCTIONS}" # --- Wall-clock lifetime watchdog (background) --- -# Caps total session time independently of idle timeout. When the cap is -# hit the watchdog kills the tmux session, posts a summary comment on the -# issue, and writes PHASE:failed so monitor_phase_loop exits. +# Caps total run time independently of claude -p timeout. When the cap is +# hit the watchdog kills the main process, which triggers cleanup via trap. _lifetime_watchdog() { local remaining=$(( MAX_LIFETIME - ($(date +%s) - SESSION_START_EPOCH) )) [ "$remaining" -le 0 ] && remaining=1 sleep "$remaining" local hours=$(( MAX_LIFETIME / 3600 )) - log "MAX_LIFETIME (${hours}h) reached — killing session" - agent_kill_session "$SESSION_NAME" + log "MAX_LIFETIME (${hours}h) reached — killing agent" # Post summary comment on issue - local body="Action session killed: wall-clock lifetime cap (${hours}h) reached." + local body="Action agent killed: wall-clock lifetime cap (${hours}h) reached." curl -sf -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${ISSUE}/comments" \ -d "{\"body\": \"${body}\"}" >/dev/null 2>&1 || true - printf 'PHASE:failed\nReason: max_lifetime (%sh) reached\n' "$hours" > "$PHASE_FILE" - # Touch phase-changed marker so monitor_phase_loop picks up immediately - touch "/tmp/phase-changed-${SESSION_NAME}.marker" + kill $$ 2>/dev/null || true } _lifetime_watchdog & LIFETIME_WATCHDOG_PID=$! -# --- Monitor phase loop (shared with dev-agent) --- -status "monitoring phase: ${PHASE_FILE} (action agent)" -monitor_phase_loop "$PHASE_FILE" "$IDLE_TIMEOUT" _on_phase_change "$SESSION_NAME" +# --- Run agent --- +log "running agent (worktree: ${WORKTREE})" +agent_run --worktree "$WORKTREE" "$PROMPT" +log "agent_run complete" -# Handle exit reason from monitor_phase_loop -case "${_MONITOR_LOOP_EXIT:-}" in - idle_timeout) - # Post diagnostic comment + label blocked - post_blocked_diagnostic "idle_timeout" - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE" - ;; - idle_prompt) - # Notification + blocked label already handled by _on_phase_change(PHASE:failed) callback - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE" - ;; - PHASE:failed) - # Check if this was a max_lifetime kill (phase file contains the reason) - if grep -q 'max_lifetime' "$PHASE_FILE" 2>/dev/null; then - post_blocked_diagnostic "max_lifetime" - fi - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE" - ;; - done) - # Belt-and-suspenders: callback handles primary cleanup, - # but ensure sentinel files are removed if callback was interrupted - rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE" - ;; -esac +# --- Detect if branch was pushed (Path A vs Path B) --- +PUSHED=false +# Check if remote branch exists +git fetch "${FORGE_REMOTE}" "$BRANCH" 2>/dev/null || true +if git rev-parse --verify "${FORGE_REMOTE}/${BRANCH}" >/dev/null 2>&1; then + PUSHED=true +fi +# Fallback: check local commits ahead of base +if [ "$PUSHED" = false ]; then + if git -C "$WORKTREE" log "${FORGE_REMOTE}/${PRIMARY_BRANCH}..${BRANCH}" --oneline 2>/dev/null | grep -q .; then + PUSHED=true + fi +fi + +if [ "$PUSHED" = true ]; then + # --- Path A: code changes pushed — create PR and walk to merge --- + log "branch pushed — creating PR" + PR_NUMBER="" + PR_NUMBER=$(pr_create "$BRANCH" "action: ${ISSUE_TITLE}" \ + "Closes #${ISSUE} + +Automated action execution by action-agent.") || true + + if [ -n "$PR_NUMBER" ]; then + log "walking PR #${PR_NUMBER} to merge" + pr_walk_to_merge "$PR_NUMBER" "$_AGENT_SESSION_ID" "$WORKTREE" || true + + case "${_PR_WALK_EXIT_REASON:-}" in + merged) + log "PR #${PR_NUMBER} merged — closing issue" + issue_close "$ISSUE" + ;; + *) + log "PR #${PR_NUMBER} not merged (reason: ${_PR_WALK_EXIT_REASON:-unknown})" + issue_block "$ISSUE" "pr_not_merged: ${_PR_WALK_EXIT_REASON:-unknown}" + ;; + esac + else + log "ERROR: failed to create PR" + issue_block "$ISSUE" "pr_creation_failed" + fi +else + # --- Path B: no code changes — close issue directly --- + log "no branch pushed — closing issue (Path B)" + issue_close "$ISSUE" +fi log "action-agent finished for issue #${ISSUE}" From 6f64013fc65df4ff8e74faf01131d822127c1369 Mon Sep 17 00:00:00 2001 From: Claude <noreply@anthropic.com> Date: Sat, 28 Mar 2026 11:15:10 +0000 Subject: [PATCH 6/6] fix: Migrate action-agent.sh to SDK + shared libraries (#5) Rewrite action-agent from tmux session + phase-handler pattern to synchronous SDK pattern (agent_run via claude -p). Uses shared libraries: - agent-sdk.sh for one-shot Claude invocation - issue-lifecycle.sh for issue_check_deps/issue_close/issue_block - pr-lifecycle.sh for pr_create/pr_walk_to_merge - worktree.sh for worktree_create/worktree_cleanup Add default callback stubs to phase-handler.sh (cleanup_worktree, cleanup_labels) so it is self-contained now that action-agent.sh no longer sources it. Update agent-smoke.sh accordingly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --- .woodpecker/agent-smoke.sh | 6 +++--- dev/phase-handler.sh | 11 +++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/.woodpecker/agent-smoke.sh b/.woodpecker/agent-smoke.sh index 94e9258..9a37bf4 100644 --- a/.woodpecker/agent-smoke.sh +++ b/.woodpecker/agent-smoke.sh @@ -199,9 +199,9 @@ check_script lib/ci-debug.sh check_script lib/parse-deps.sh # Agent scripts — list cross-sourced files where function scope flows across files. -# phase-handler.sh calls helpers defined by its sourcing agent (action-agent.sh). +# phase-handler.sh defines default callback stubs; sourcing agents may override. check_script dev/dev-agent.sh -check_script dev/phase-handler.sh action/action-agent.sh lib/secret-scan.sh +check_script dev/phase-handler.sh lib/secret-scan.sh check_script dev/dev-poll.sh check_script dev/phase-test.sh check_script gardener/gardener-run.sh @@ -215,7 +215,7 @@ check_script vault/vault-fire.sh check_script vault/vault-poll.sh check_script vault/vault-reject.sh check_script action/action-poll.sh -check_script action/action-agent.sh dev/phase-handler.sh +check_script action/action-agent.sh check_script supervisor/supervisor-run.sh check_script supervisor/preflight.sh check_script predictor/predictor-run.sh diff --git a/dev/phase-handler.sh b/dev/phase-handler.sh index ab099d6..8f3b3b4 100644 --- a/dev/phase-handler.sh +++ b/dev/phase-handler.sh @@ -34,6 +34,17 @@ source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh" # shellcheck source=../lib/mirrors.sh source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh" +# --- Default callback stubs (agents can override after sourcing) --- +# cleanup_worktree and cleanup_labels are called during phase transitions. +# Provide no-op defaults so phase-handler.sh is self-contained; sourcing +# agents override these with real implementations. +if ! declare -f cleanup_worktree >/dev/null 2>&1; then + cleanup_worktree() { :; } +fi +if ! declare -f cleanup_labels >/dev/null 2>&1; then + cleanup_labels() { :; } +fi + # --- Default globals (agents can override after sourcing) --- : "${CI_POLL_TIMEOUT:=1800}" : "${REVIEW_POLL_TIMEOUT:=10800}"