diff --git a/dev/dev-agent.sh b/dev/dev-agent.sh index 311ba07..b22264b 100755 --- a/dev/dev-agent.sh +++ b/dev/dev-agent.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# dev-agent.sh — Autonomous developer agent for a single issue +# dev-agent.sh — Autonomous developer agent for a single issue (tmux session manager) # # Usage: ./dev-agent.sh # @@ -7,20 +7,16 @@ # 1. Fetch issue, check dependencies (preflight) # 2. Claim issue (label: in-progress, remove backlog) # 3. Create worktree + branch -# 4. Run claude -p with implementation prompt -# 5. Commit + push + create PR -# 6. Wait for CI + AI review -# 7. Feed review back via claude -p -c (continues session) -# 8. On APPROVE → merge, delete branch, clean labels, close issue +# 4. Create tmux session: dev-{project}-{issue} with interactive claude +# 5. Send initial prompt via tmux (issue body, context, phase protocol) +# 6. Monitor phase file — Claude signals when it needs input +# 7. React to phases: create PR, poll CI, inject results, inject review, merge +# 8. Kill session on PHASE:done, PHASE:failed, or 2h idle timeout # -# Preflight JSON output: -# {"status": "ready"} -# {"status": "unmet_dependency", "blocked_by": [315, 316], "suggestion": 317} -# {"status": "too_large", "reason": "..."} -# {"status": "already_done", "reason": "..."} -# -# Peek: cat /tmp/dev-agent-status -# Log: tail -f dev-agent.log +# Phase file: /tmp/dev-session-{project}-{issue}.phase +# Session: dev-{project}-{issue} (tmux) +# Peek phase: head -1 /tmp/dev-session-{project}-{issue}.phase +# Log: tail -f dev-agent.log set -euo pipefail @@ -42,9 +38,27 @@ LOGFILE="${FACTORY_ROOT}/dev/dev-agent.log" PREFLIGHT_RESULT="/tmp/dev-agent-preflight.json" BRANCH="fix/issue-${ISSUE}" WORKTREE="/tmp/${PROJECT_NAME}-worktree-${ISSUE}" -REVIEW_POLL_INTERVAL=300 # 5 min between review checks + +# Tmux session + phase protocol +PHASE_FILE="/tmp/dev-session-${PROJECT_NAME}-${ISSUE}.phase" +SESSION_NAME="dev-${PROJECT_NAME}-${ISSUE}" +IMPL_SUMMARY_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE}.txt" + +# Timing +PHASE_POLL_INTERVAL=30 # seconds between phase checks +IDLE_TIMEOUT=7200 # 2h: kill session if phase stale this long +CI_POLL_TIMEOUT=1800 # 30min max for CI to complete +REVIEW_POLL_TIMEOUT=10800 # 3h max wait for review + +# Limits +MAX_CI_FIXES=3 MAX_REVIEW_ROUNDS=5 -CLAUDE_TIMEOUT=7200 + +# Counters — global state across phase transitions +CI_RETRY_COUNT=0 +CI_FIX_COUNT=0 +REVIEW_ROUND=0 +PR_NUMBER="" # --- Logging --- log() { @@ -60,6 +74,57 @@ notify() { matrix_send "dev" "🔧 #${ISSUE}: $*" 2>/dev/null || true } +# --- Phase helpers --- +read_phase() { + { cat "$PHASE_FILE" 2>/dev/null || true; } | head -1 | tr -d '[:space:]' +} + +inject_into_session() { + local text="$1" + local tmpfile + tmpfile=$(mktemp /tmp/tmux-inject-XXXXXX) + printf '%s' "$text" > "$tmpfile" + tmux load-buffer -b "inject-${ISSUE}" "$tmpfile" + tmux paste-buffer -t "${SESSION_NAME}" -b "inject-${ISSUE}" + sleep 0.5 + tmux send-keys -t "${SESSION_NAME}" "" Enter + tmux delete-buffer -b "inject-${ISSUE}" 2>/dev/null || true + rm -f "$tmpfile" +} + +kill_tmux_session() { + tmux kill-session -t "${SESSION_NAME}" 2>/dev/null || true +} + +# --- Refusal comment helper (used in PHASE:failed handler) --- +post_refusal_comment() { + local emoji="$1" title="$2" body="$3" + local last_has_title + last_has_title=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${ISSUE}/comments?limit=5" | \ + jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true + if [ "$last_has_title" = "true" ]; then + log "skipping duplicate refusal comment: ${title}" + return 0 + fi + local comment="${emoji} **Dev-agent: ${title}** + +${body} + +--- +*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" + printf '%s' "$comment" > "/tmp/refusal-comment.txt" + jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json" + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/comments" \ + --data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \ + log "WARNING: failed to post refusal comment" + rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json" +} + +# --- Cleanup helpers --- cleanup_worktree() { cd "$REPO_ROOT" git worktree remove "$WORKTREE" --force 2>/dev/null || true @@ -78,6 +143,8 @@ cleanup_labels() { CLAIMED=false cleanup() { rm -f "$LOCKFILE" "$STATUSFILE" + # Kill any live session so Claude doesn't run without an orchestrator attached + kill_tmux_session # If we claimed the issue but never created a PR, unclaim it if [ "$CLAIMED" = true ] && [ -z "${PR_NUMBER:-}" ]; then log "cleanup: unclaiming issue (no PR created)" @@ -93,21 +160,154 @@ cleanup() { } trap cleanup EXIT +# ============================================================================= +# MERGE HELPER +# ============================================================================= +do_merge() { + local sha="$1" + local pr="${PR_NUMBER}" -# --- Log rotation --- + for _m in $(seq 1 20); do + local ci + ci=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${sha}/status" | jq -r '.state // "unknown"') + [ "$ci" = "success" ] && break + if [ "$ci" = "failure" ] || [ "$ci" = "error" ]; then + log "CI is red before merge attempt — aborting" + notify "PR #${pr} CI is failing; cannot merge." + return 1 + fi + sleep 30 + done + + # Pre-emptive rebase to avoid merge conflicts + local mergeable + mergeable=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${pr}" | jq -r '.mergeable // true') + if [ "$mergeable" = "false" ]; then + log "PR #${pr} has merge conflicts — attempting rebase" + local work_dir="${WORKTREE:-$REPO_ROOT}" + if (cd "$work_dir" && git fetch origin "${PRIMARY_BRANCH}" && git rebase "origin/${PRIMARY_BRANCH}" 2>&1); then + log "rebase succeeded — force pushing" + (cd "$work_dir" && git push origin "${BRANCH}" --force-with-lease 2>&1) || true + sha=$(cd "$work_dir" && git rev-parse HEAD) + log "waiting for CI on rebased commit ${sha:0:7}" + local r_ci + for _r in $(seq 1 20); do + r_ci=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${sha}/status" | jq -r '.state // "unknown"') + [ "$r_ci" = "success" ] && break + if [ "$r_ci" = "failure" ] || [ "$r_ci" = "error" ]; then + log "CI failed after rebase" + notify "PR #${pr} CI failed after rebase. Needs manual fix." + return 1 + fi + sleep 30 + done + else + log "rebase failed — aborting and escalating" + (cd "$work_dir" && git rebase --abort 2>/dev/null) || true + notify "PR #${pr} has merge conflicts that need manual resolution." + return 1 + fi + fi + + local http_code + http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${pr}/merge" \ + -d '{"Do":"merge","delete_branch_after_merge":true}') + + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + log "PR #${pr} merged!" + curl -sf -X DELETE \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/branches/${BRANCH}" >/dev/null 2>&1 || true + curl -sf -X PATCH \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}" \ + -d '{"state":"closed"}' >/dev/null 2>&1 || true + cleanup_labels + notify "✅ PR #${pr} merged! Issue #${ISSUE} done." + kill_tmux_session + cleanup_worktree + rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" + exit 0 + else + log "merge failed (HTTP ${http_code}) — attempting rebase and retry" + local work_dir="${WORKTREE:-$REPO_ROOT}" + if (cd "$work_dir" && git fetch origin "${PRIMARY_BRANCH}" && git rebase "origin/${PRIMARY_BRANCH}" 2>&1); then + log "rebase succeeded — force pushing" + (cd "$work_dir" && git push origin "${BRANCH}" --force-with-lease 2>&1) || true + sha=$(cd "$work_dir" && git rev-parse HEAD) + log "waiting for CI on rebased commit ${sha:0:7}" + local r2_ci + for _r2 in $(seq 1 20); do + r2_ci=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${sha}/status" | jq -r '.state // "unknown"') + [ "$r2_ci" = "success" ] && break + if [ "$r2_ci" = "failure" ] || [ "$r2_ci" = "error" ]; then + log "CI failed after merge-retry rebase" + notify "PR #${pr} CI failed after rebase. Needs manual fix." + return 1 + fi + sleep 30 + done + # Re-approve (force push dismisses stale approvals) + curl -sf -X POST \ + -H "Authorization: token ${REVIEW_BOT_TOKEN:-${CODEBERG_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${pr}/reviews" \ + -d '{"event":"APPROVED","body":"Auto-approved after rebase."}' >/dev/null 2>&1 || true + # Retry merge + http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${pr}/merge" \ + -d '{"Do":"merge","delete_branch_after_merge":true}') + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + log "PR #${pr} merged after rebase!" + notify "✅ PR #${pr} merged! Issue #${ISSUE} done." + curl -sf -X PATCH -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}" -d '{"state":"closed"}' >/dev/null 2>&1 || true + cleanup_labels + kill_tmux_session + cleanup_worktree + rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" + exit 0 + fi + else + (cd "$work_dir" && git rebase --abort 2>/dev/null) || true + fi + log "merge still failing after rebase (HTTP ${http_code})" + notify "PR #${pr} merge failed after rebase (HTTP ${http_code}). Needs human attention." + return 1 + fi +} + +# ============================================================================= +# LOG ROTATION +# ============================================================================= if [ -f "$LOGFILE" ] && [ "$(stat -c%s "$LOGFILE" 2>/dev/null || echo 0)" -gt 102400 ]; then mv "$LOGFILE" "$LOGFILE.old" log "Log rotated" fi -# --- Memory guard --- +# ============================================================================= +# MEMORY GUARD +# ============================================================================= AVAIL_MB=$(awk '/MemAvailable/ {printf "%d", $2/1024}' /proc/meminfo) if [ "$AVAIL_MB" -lt 2000 ]; then log "SKIP: only ${AVAIL_MB}MB available (need 2000MB)" exit 0 fi -# --- Concurrency lock --- +# ============================================================================= +# CONCURRENCY LOCK +# ============================================================================= if [ -f "$LOCKFILE" ]; then LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then @@ -119,7 +319,9 @@ if [ -f "$LOCKFILE" ]; then fi echo $$ > "$LOCKFILE" -# --- Fetch issue --- +# ============================================================================= +# FETCH ISSUE +# ============================================================================= status "fetching issue" ISSUE_JSON=$(curl -s -H "Authorization: token ${CODEBERG_TOKEN}" "${API}/issues/${ISSUE}") || true if [ -z "$ISSUE_JSON" ] || ! echo "$ISSUE_JSON" | jq -e '.id' >/dev/null 2>&1; then @@ -269,13 +471,12 @@ This issue depends on ${BLOCKED_LIST}, which $(if [ "${#BLOCKED_BY[@]}" -eq 1 ]; exit 0 fi -# Bash preflight passed (no explicit unmet deps) -log "bash preflight passed — no explicit unmet dependencies" +# Preflight passed (no explicit unmet deps) +log "preflight passed — no explicit unmet dependencies" # ============================================================================= -# CLAIM ISSUE (tentative — will unclaim if claude refuses) +# CLAIM ISSUE # ============================================================================= - curl -sf -X POST \ -H "Authorization: token ${CODEBERG_TOKEN}" \ -H "Content-Type: application/json" \ @@ -286,10 +487,6 @@ curl -sf -X DELETE \ -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/issues/${ISSUE}/labels/backlog" >/dev/null 2>&1 || true -curl -sf -X DELETE \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - "${API}/issues/${ISSUE}/labels/backlog" >/dev/null 2>&1 || true - CLAIMED=true # ============================================================================= @@ -325,7 +522,7 @@ if [ -z "$EXISTING_PR" ]; then fi if [ -z "$EXISTING_PR" ]; then - # Priority 2: match "Fixes #NNN" or "fixes #NNN" in PR body (stricter: word boundary) + # Priority 2: match "Fixes #NNN" in PR body FOUND_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/pulls?state=open&limit=20" | \ jq -r --arg issue "ixes #${ISSUE}\\b" \ @@ -347,7 +544,6 @@ if [ -z "$EXISTING_PR" ]; then if [ -n "$CLOSED_PR" ]; then CLOSED_PR_NUM=$(echo "$CLOSED_PR" | awk '{print $1}') log "found closed (unmerged) PR #${CLOSED_PR_NUM} as prior art" - # Fetch the diff for claude to reference PRIOR_ART_DIFF=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/pulls/${CLOSED_PR_NUM}.diff" | head -500) || true if [ -n "$PRIOR_ART_DIFF" ]; then @@ -361,246 +557,41 @@ if [ -n "$EXISTING_PR" ]; then PR_NUMBER="$EXISTING_PR" BRANCH="$EXISTING_BRANCH" log "RECOVERY MODE: adopting PR #${PR_NUMBER} on branch ${BRANCH}" +fi - PR_SHA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ - "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') +# ============================================================================= +# WORKTREE SETUP +# ============================================================================= +status "setting up worktree" +cd "$REPO_ROOT" - PENDING_REVIEW=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ - "${API}/issues/${PR_NUMBER}/comments?limit=50" | \ - jq -r --arg sha "$PR_SHA" \ - '[.[] | select(.body | contains(" - -### Changes made: -${CHANGE_SUMMARY} - ---- -*Addressed at \`$(git rev-parse HEAD | head -c 7)\` · automated by dev-agent (recovery mode)*" - - printf '%s' "$DEV_COMMENT" > /tmp/dev-comment-body.txt - jq -Rs '{body: .}' < /tmp/dev-comment-body.txt > /tmp/dev-comment.json - curl -sf -o /dev/null -X POST \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${PR_NUMBER}/comments" \ - --data-binary @/tmp/dev-comment.json 2>/dev/null || \ - log "WARNING: failed to post dev-response comment" - rm -f /tmp/dev-comment-body.txt /tmp/dev-comment.json - fi - else - # Check if PR already has approval — try merge immediately - EXISTING_APPROVAL=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ - "${API}/pulls/${PR_NUMBER}/reviews" | \ - jq -r '[.[] | select(.stale == false and .state == "APPROVED")] | length') - CI_NOW=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ - "${API}/commits/$(git -C "$REPO_ROOT" rev-parse "origin/${BRANCH}" 2>/dev/null || echo HEAD)/status" | jq -r '.state // "unknown"') - CI_PASS=false - if [ "$CI_NOW" = "success" ]; then - CI_PASS=true - elif [ "${WOODPECKER_REPO_ID:-2}" = "0" ] && { [ -z "$CI_NOW" ] || [ "$CI_NOW" = "pending" ] || [ "$CI_NOW" = "unknown" ]; }; then - CI_PASS=true # no CI configured for this project - fi - if [ "${EXISTING_APPROVAL:-0}" -gt 0 ] && [ "$CI_PASS" = true ]; then - log "PR already approved + CI green — attempting merge" - MERGE_HTTP=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/pulls/${PR_NUMBER}/merge" \ - -d '{"Do":"merge","delete_branch_after_merge":true}') - if [ "$MERGE_HTTP" = "200" ] || [ "$MERGE_HTTP" = "204" ]; then - log "PR #${PR_NUMBER} merged!" - notify "✅ PR #${PR_NUMBER} merged! Issue #${ISSUE} done." - curl -sf -X PATCH -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}" -d '{"state":"closed"}' >/dev/null 2>&1 || true - cleanup_labels - cleanup_worktree - exit 0 - fi - # Merge failed — rebase and retry - log "merge failed (HTTP ${MERGE_HTTP}) — rebasing" - cd "$REPO_ROOT" - git fetch origin "${PRIMARY_BRANCH}" "$BRANCH" 2>/dev/null - TMP_WT="/tmp/rebase-pr-${PR_NUMBER}" - rm -rf "$TMP_WT" - if git worktree add "$TMP_WT" "$BRANCH" 2>/dev/null && \ - (cd "$TMP_WT" && git rebase "origin/${PRIMARY_BRANCH}" 2>&1) && \ - (cd "$TMP_WT" && git push --force-with-lease origin "$BRANCH" 2>&1); then - log "rebased — waiting for CI + re-approval" - git worktree remove "$TMP_WT" 2>/dev/null || true - NEW_SHA=$(git rev-parse "origin/${BRANCH}" 2>/dev/null || true) - # Wait for CI - for _r in $(seq 1 20); do - _ci=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ - "${API}/commits/${NEW_SHA}/status" | jq -r '.state // "unknown"') - [ "$_ci" = "success" ] && break - sleep 30 - done - # Re-approve (force push dismissed stale approval) - curl -sf -X POST -H "Authorization: token ${REVIEW_BOT_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/pulls/${PR_NUMBER}/reviews" \ - -d '{"event":"APPROVED","body":"Auto-approved after rebase."}' >/dev/null 2>&1 || true - # Retry merge - MERGE_HTTP=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/pulls/${PR_NUMBER}/merge" \ - -d '{"Do":"merge","delete_branch_after_merge":true}') - if [ "$MERGE_HTTP" = "200" ] || [ "$MERGE_HTTP" = "204" ]; then - log "PR #${PR_NUMBER} merged after rebase!" - notify "✅ PR #${PR_NUMBER} merged! Issue #${ISSUE} done." - curl -sf -X PATCH -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}" -d '{"state":"closed"}' >/dev/null 2>&1 || true - cleanup_labels - cleanup_worktree - exit 0 - fi - notify "PR #${PR_NUMBER} merge failed after rebase (HTTP ${MERGE_HTTP}). Needs human attention." - else - git worktree remove --force "$TMP_WT" 2>/dev/null || true - notify "PR #${PR_NUMBER} rebase failed. Needs human attention." - fi - exit 0 - fi - log "no unaddressed review found — PR exists, entering review loop to wait" - cd "$REPO_ROOT" - git fetch origin "$BRANCH" 2>/dev/null - - # Reuse existing worktree if on the right branch (preserves .claude session) - if [ -d "$WORKTREE/.git" ] || [ -f "$WORKTREE/.git" ]; then - WT_BRANCH=$(cd "$WORKTREE" && git rev-parse --abbrev-ref HEAD 2>/dev/null || true) - if [ "$WT_BRANCH" = "$BRANCH" ]; then - log "reusing existing worktree (preserves claude session)" - cd "$WORKTREE" - git pull --ff-only origin "$BRANCH" 2>/dev/null || git reset --hard "origin/${BRANCH}" 2>/dev/null || true - else - cleanup_worktree - git worktree add "$WORKTREE" "origin/${BRANCH}" -B "$BRANCH" 2>&1 || { - log "ERROR: worktree setup failed for recovery" - exit 1 - } - cd "$WORKTREE" - git submodule update --init --recursive 2>/dev/null || true - fi - else - cleanup_worktree - git worktree add "$WORKTREE" "origin/${BRANCH}" -B "$BRANCH" 2>&1 || { - log "ERROR: worktree setup failed for recovery" - exit 1 - } - cd "$WORKTREE" - git submodule update --init --recursive 2>/dev/null || true + git pull --ff-only origin "$BRANCH" 2>/dev/null || git reset --hard "origin/${BRANCH}" 2>/dev/null || true + REUSE_WORKTREE=true fi fi -else - # ============================================================================= - # NORMAL MODE: implement from scratch - # ============================================================================= - status "creating worktree" - cd "$REPO_ROOT" + if [ "$REUSE_WORKTREE" = false ]; then + cleanup_worktree + git worktree add "$WORKTREE" "origin/${BRANCH}" -B "$BRANCH" 2>&1 || { + log "ERROR: worktree creation failed for recovery" + cleanup_labels + exit 1 + } + cd "$WORKTREE" + git submodule update --init --recursive 2>/dev/null || true + fi +else + # Normal mode: create fresh worktree from primary branch # Ensure repo is in clean state (abort stale rebases, checkout primary branch) if [ -d "$REPO_ROOT/.git/rebase-merge" ] || [ -d "$REPO_ROOT/.git/rebase-apply" ]; then @@ -626,7 +617,6 @@ else git checkout -B "$BRANCH" "origin/${PRIMARY_BRANCH}" 2>/dev/null git submodule update --init --recursive 2>/dev/null || true - # Symlink lib node_modules from main repo (submodule init doesn't run npm install) for lib_dir in "$REPO_ROOT"/onchain/lib/*/; do lib_name=$(basename "$lib_dir") @@ -634,14 +624,119 @@ else ln -s "$lib_dir/node_modules" "$WORKTREE/onchain/lib/$lib_name/node_modules" 2>/dev/null || true fi done +fi - # --- Build the unified prompt: implement OR refuse --- - # Gather open issue list for context (so claude can suggest alternatives) - OPEN_ISSUES_SUMMARY=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ - "${API}/issues?state=open&labels=backlog&limit=20&type=issues" | \ - jq -r '.[] | "#\(.number) \(.title)"' 2>/dev/null || echo "(could not fetch)") +# ============================================================================= +# BUILD PROMPT +# ============================================================================= +OPEN_ISSUES_SUMMARY=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues?state=open&labels=backlog&limit=20&type=issues" | \ + jq -r '.[] | "#\(.number) \(.title)"' 2>/dev/null || echo "(could not fetch)") - PROMPT="You are working in a git worktree at ${WORKTREE} on branch ${BRANCH}. +PHASE_PROTOCOL_INSTRUCTIONS="## Phase-Signaling Protocol (REQUIRED) + +You are running in a persistent tmux session managed by an orchestrator. +Communicate progress by writing to the phase file. The orchestrator watches +this file and injects events (CI results, review feedback) back into this session. + +### Key files +\`\`\` +PHASE_FILE=\"${PHASE_FILE}\" +SUMMARY_FILE=\"${IMPL_SUMMARY_FILE}\" +\`\`\` + +### Phase transitions — write these exactly: + +**After committing and pushing your branch:** +\`\`\`bash +git push origin ${BRANCH} +# Write a short summary of what you implemented: +printf '%s' \"\" > \"\${SUMMARY_FILE}\" +# Signal the orchestrator to create the PR and watch for CI: +echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" +\`\`\` +Then STOP and wait. The orchestrator will inject CI results. + +**When you receive a \"CI passed\" injection:** +\`\`\`bash +echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\" +\`\`\` +Then STOP and wait. The orchestrator will inject review feedback. + +**When you receive a \"CI failed:\" injection:** +Fix the CI issue, commit, push, then: +\`\`\`bash +echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" +\`\`\` +Then STOP and wait. + +**When you receive a \"Review: REQUEST_CHANGES\" injection:** +Address ALL review feedback, commit, push, then: +\`\`\`bash +echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" +\`\`\` +(CI runs again after each push — always write awaiting_ci, not awaiting_review) + +**When you receive an \"Approved\" injection:** +\`\`\`bash +echo \"PHASE:done\" > \"${PHASE_FILE}\" +\`\`\` +The orchestrator handles the merge. You are done. + +**If refusing (too large, unmet dep, already done):** +\`\`\`bash +printf '%s' '{\"status\":\"too_large\",\"reason\":\"...\"}' > \"\${SUMMARY_FILE}\" +printf 'PHASE:failed\nReason: refused\n' > \"${PHASE_FILE}\" +\`\`\` + +**On unrecoverable failure:** +\`\`\`bash +printf 'PHASE:failed\nReason: %s\n' \"describe what failed\" > \"${PHASE_FILE}\" +\`\`\`" + +if [ "$RECOVERY_MODE" = true ]; then + # Build recovery context + GIT_DIFF_STAT=$(git -C "$WORKTREE" diff "origin/${PRIMARY_BRANCH}..HEAD" --stat 2>/dev/null | head -20 || echo "(no diff)") + LAST_PHASE=$(read_phase) + CI_RESULT=$(cat "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || echo "") + REVIEW_COMMENTS=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${PR_NUMBER}/comments?limit=10" | \ + jq -r '.[-3:] | .[] | "[\(.user.login)] \(.body[:500])"' 2>/dev/null || echo "(none)") + + INITIAL_PROMPT="You are working in a git worktree at ${WORKTREE} on branch ${BRANCH}. +This is issue #${ISSUE} for the ${CODEBERG_REPO} project. + +## Issue: ${ISSUE_TITLE} + +${ISSUE_BODY} + +## CRASH RECOVERY + +Your previous session for this issue was interrupted. Resume from where you left off. +Git is the checkpoint — your code changes survived. + +### Work completed before crash: +\`\`\` +${GIT_DIFF_STAT} +\`\`\` + +### Last known phase: ${LAST_PHASE:-unknown} + +### PR: #${PR_NUMBER} (${BRANCH}) + +### Recent PR comments: +${REVIEW_COMMENTS} +$(if [ -n "$CI_RESULT" ]; then printf '\n### Last CI result:\n%s\n' "$CI_RESULT"; fi) + +### Next steps +1. Run \`git log --oneline -5\` and \`git status\` to understand current state. +2. Resume from the last known phase. +3. Follow the phase protocol below. + +${PHASE_PROTOCOL_INSTRUCTIONS}" +else + # Normal mode: initial implementation prompt + INITIAL_PROMPT="You are working in a git worktree at ${WORKTREE} on branch ${BRANCH}. You have been assigned issue #${ISSUE} for the ${CODEBERG_REPO} project. ## Issue: ${ISSUE_TITLE} @@ -651,13 +746,9 @@ ${ISSUE_BODY} ## Other open issues labeled 'backlog' (for context if you need to suggest alternatives): ${OPEN_ISSUES_SUMMARY} -$(if [ -n "$PRIOR_ART_DIFF" ]; then echo "## Prior Art (closed PR — DO NOT start from scratch) - -A previous PR attempted this issue but was closed without merging. Review the diff below and reuse as much as possible. Fix whatever caused it to fail (merge conflicts, CI errors, review findings). - -\`\`\`diff -${PRIOR_ART_DIFF} -\`\`\`"; fi) +$(if [ -n "$PRIOR_ART_DIFF" ]; then + printf '## Prior Art (closed PR — DO NOT start from scratch)\n\nA previous PR attempted this issue but was closed without merging. Review the diff below and reuse as much as possible. Fix whatever caused it to fail (merge conflicts, CI errors, review findings).\n\n```diff\n%s\n```\n' "$PRIOR_ART_DIFF" +fi) ## Instructions @@ -669,11 +760,10 @@ If the issue is clear, dependencies are met, and scope is reasonable: 2. Implement the changes described in the issue. 3. Run lint and tests before you're done (see AGENTS.md for commands). 4. Commit your changes with message: fix: ${ISSUE_TITLE} (#${ISSUE}) -5. Do NOT push or create PRs — the orchestrator handles that. -6. When finished, output a summary of what you changed and why. +5. Follow the phase protocol below to signal progress. -### Option B: Refuse (output JSON only) -If you cannot or should not implement this issue, output ONLY a JSON object (no other text) with one of these structures: +### Option B: Refuse (write JSON to SUMMARY_FILE, then write PHASE:failed) +If you cannot or should not implement this issue, write ONLY a JSON object to \$SUMMARY_FILE: **Unmet dependency** — required code/infrastructure doesn't exist in the repo yet: \`\`\` @@ -690,6 +780,11 @@ If you cannot or should not implement this issue, output ONLY a JSON object (no {\"status\": \"already_done\", \"reason\": \"where the existing implementation is\"} \`\`\` +Then write: +\`\`\`bash +printf 'PHASE:failed\nReason: refused\n' > \"${PHASE_FILE}\" +\`\`\` + ### How to decide - Read the issue carefully. Check if files/functions it references actually exist in the repo. - If it depends on other issues, check if those issues' deliverables are present in the codebase. @@ -697,678 +792,616 @@ If you cannot or should not implement this issue, output ONLY a JSON object (no - If another open issue should be done first, suggest it. - When in doubt, implement. Only refuse if there's a clear, specific reason. -**Do NOT invent dependencies that aren't real.** If the code compiles and tests pass, that's ready." +**Do NOT invent dependencies that aren't real.** If the code compiles and tests pass, that's ready. - status "claude assessing + implementing" - IMPL_OUTPUT=$(cd "$WORKTREE" && timeout "$CLAUDE_TIMEOUT" \ - claude -p --model sonnet --dangerously-skip-permissions "$PROMPT" 2>&1) || { - EXIT_CODE=$? - if [ "$EXIT_CODE" -eq 124 ]; then - log "TIMEOUT: claude took longer than ${CLAUDE_TIMEOUT}s" - notify "timed out during implementation" - else - log "ERROR: claude exited with code ${EXIT_CODE}" - notify "claude failed (exit ${EXIT_CODE})" - fi +${PHASE_PROTOCOL_INSTRUCTIONS}" +fi + +# ============================================================================= +# CREATE TMUX SESSION +# ============================================================================= +status "creating tmux session: ${SESSION_NAME}" + +# Reuse existing session if still alive (orchestrator may have been restarted) +if ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then + # Kill any stale entry + tmux kill-session -t "${SESSION_NAME}" 2>/dev/null || true + + # Create new detached session running interactive claude in the worktree + tmux new-session -d -s "${SESSION_NAME}" -c "${WORKTREE}" \ + "claude --dangerously-skip-permissions" + + # Wait for Claude to initialize + sleep 3 + + if ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then + log "ERROR: failed to create tmux session ${SESSION_NAME}" cleanup_labels cleanup_worktree exit 1 - } + fi + log "tmux session created: ${SESSION_NAME}" +else + log "reusing existing tmux session: ${SESSION_NAME}" +fi - log "claude finished ($(printf '%s' "$IMPL_OUTPUT" | wc -c) bytes)" - printf '%s' "$IMPL_OUTPUT" > /tmp/dev-agent-last-output.txt +# Send initial prompt via paste buffer (handles long text and special chars) +PROMPT_TMPFILE=$(mktemp /tmp/dev-prompt-XXXXXX) +printf '%s' "$INITIAL_PROMPT" > "$PROMPT_TMPFILE" +tmux load-buffer -b "prompt-${ISSUE}" "$PROMPT_TMPFILE" +tmux paste-buffer -t "${SESSION_NAME}" -b "prompt-${ISSUE}" +sleep 1 +tmux send-keys -t "${SESSION_NAME}" "" Enter +tmux delete-buffer -b "prompt-${ISSUE}" 2>/dev/null || true +rm -f "$PROMPT_TMPFILE" - # --- Check if claude refused (JSON response) vs implemented (commits) --- - REFUSAL_JSON="" +log "initial prompt sent to tmux session" - # Check for refusal: try to parse output as JSON with a status field - # First try raw output - if printf '%s' "$IMPL_OUTPUT" | jq -e '.status' > /dev/null 2>&1; then - REFUSAL_JSON="$IMPL_OUTPUT" - else - # Try extracting from code fence - EXTRACTED=$(printf '%s' "$IMPL_OUTPUT" | sed -n '/^```/,/^```$/p' | sed '1d;$d') - if [ -n "$EXTRACTED" ] && printf '%s' "$EXTRACTED" | jq -e '.status' > /dev/null 2>&1; then - REFUSAL_JSON="$EXTRACTED" - else - # Try extracting first { ... } block (handles preamble text before JSON) - EXTRACTED=$(printf '%s' "$IMPL_OUTPUT" | grep -Pzo '\{[^{}]*"status"[^{}]*\}' 2>/dev/null | tr '\0' '\n' | head -1 || true) - if [ -n "$EXTRACTED" ] && printf '%s' "$EXTRACTED" | jq -e '.status' > /dev/null 2>&1; then - REFUSAL_JSON="$EXTRACTED" - fi - fi +# Signal to dev-poll.sh that we're running (session is up) +echo '{"status":"ready"}' > "$PREFLIGHT_RESULT" +notify "tmux session ${SESSION_NAME} started for issue #${ISSUE}: ${ISSUE_TITLE}" + +# ============================================================================= +# PHASE MONITORING LOOP +# ============================================================================= +status "monitoring phase: ${PHASE_FILE}" + +LAST_PHASE_MTIME=0 +IDLE_ELAPSED=0 + +while true; do + sleep "$PHASE_POLL_INTERVAL" + IDLE_ELAPSED=$(( IDLE_ELAPSED + PHASE_POLL_INTERVAL )) + + # --- Session health check --- + if ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then + CURRENT_PHASE=$(read_phase) + case "$CURRENT_PHASE" in + PHASE:done|PHASE:failed) + # Expected terminal phases — fall through to phase handler below + ;; + *) + log "WARNING: tmux session died unexpectedly (phase: ${CURRENT_PHASE:-none})" + notify "session crashed (phase: ${CURRENT_PHASE:-none}), attempting recovery" + + # Attempt crash recovery: restart session with recovery context + CRASH_DIFF=$(git -C "${WORKTREE}" diff "origin/${PRIMARY_BRANCH}..HEAD" --stat 2>/dev/null | head -20 || echo "(no diff)") + RECOVERY_MSG="## Session Recovery + +Your Claude Code session for issue #${ISSUE} was interrupted unexpectedly. +The git worktree at ${WORKTREE} is intact — your changes survived. + +Last known phase: ${CURRENT_PHASE:-unknown} + +Work so far: +${CRASH_DIFF} + +Run: git log --oneline -5 && git status +Then resume from the last phase following the original phase protocol. +Phase file: ${PHASE_FILE}" + + if tmux new-session -d -s "${SESSION_NAME}" -c "${WORKTREE}" \ + "claude --dangerously-skip-permissions" 2>/dev/null; then + sleep 3 + inject_into_session "$RECOVERY_MSG" + log "recovery session started" + IDLE_ELAPSED=0 + else + log "ERROR: could not restart session after crash" + notify "session crashed and could not recover — needs human attention" + cleanup_labels + break + fi + continue + ;; + esac fi - # But only treat as refusal if there are NO commits (claude might output JSON-like text AND commit) - cd "$WORKTREE" - AHEAD=$(git rev-list "origin/${PRIMARY_BRANCH}..HEAD" --count 2>/dev/null || echo "0") - HAS_CHANGES=$(git status --porcelain) + # --- Check phase file for changes --- + PHASE_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) + CURRENT_PHASE=$(read_phase) - if [ -n "$REFUSAL_JSON" ] && [ "$AHEAD" -eq 0 ] && [ -z "$HAS_CHANGES" ]; then - # Claude refused — parse and handle - REFUSAL_STATUS=$(printf '%s' "$REFUSAL_JSON" | jq -r '.status') - log "claude refused: ${REFUSAL_STATUS}" + if [ -z "$CURRENT_PHASE" ] || [ "$PHASE_MTIME" -le "$LAST_PHASE_MTIME" ]; then + # No phase change — check idle timeout + if [ "$IDLE_ELAPSED" -ge "$IDLE_TIMEOUT" ]; then + log "TIMEOUT: no phase update for ${IDLE_TIMEOUT}s — killing session" + notify "session idle for 2h — killed" + kill_tmux_session + cleanup_labels + if [ -n "${PR_NUMBER:-}" ]; then + log "keeping worktree (PR #${PR_NUMBER} still open)" + else + cleanup_worktree + fi + break + fi + continue + fi - # Write preflight result for dev-poll.sh - printf '%s' "$REFUSAL_JSON" > "$PREFLIGHT_RESULT" + # Phase changed — handle it + LAST_PHASE_MTIME="$PHASE_MTIME" + IDLE_ELAPSED=0 + log "phase: ${CURRENT_PHASE}" + status "${CURRENT_PHASE}" - # Unclaim issue (restore backlog label, remove in-progress) - cleanup_labels - curl -sf -X POST \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}/labels" \ - -d '{"labels":["backlog"]}' >/dev/null 2>&1 || true + # ── PHASE: awaiting_ci ────────────────────────────────────────────────────── + if [ "$CURRENT_PHASE" = "PHASE:awaiting_ci" ]; then - # --- Post refusal comment on the issue (deduplicated) --- - post_refusal_comment() { - local emoji="$1" title="$2" body="$3" - - # Skip if last comment already has same title (prevent spam) - local last_has_title - last_has_title=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ - "${API}/issues/${ISSUE}/comments?limit=1" | \ - jq -r --arg t "Dev-agent: ${title}" '.[0].body // "" | contains($t)') || true - if [ "$last_has_title" = "true" ]; then - log "skipping duplicate refusal comment: ${title}" - return 0 + # Create PR if not yet created + if [ -z "${PR_NUMBER:-}" ]; then + status "creating PR for issue #${ISSUE}" + IMPL_SUMMARY="" + if [ -f "$IMPL_SUMMARY_FILE" ]; then + # Don't treat refusal JSON as a PR summary + if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then + IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE") + fi fi - local comment="${emoji} **Dev-agent: ${title}** + printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt" + jq -n \ + --arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \ + --rawfile body "/tmp/pr-body-${ISSUE}.txt" \ + --arg head "$BRANCH" \ + --arg base "${PRIMARY_BRANCH}" \ + '{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json" -${body} - ---- -*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" - - printf '%s' "$comment" > "/tmp/refusal-comment.txt" - jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json" - curl -sf -o /dev/null -X POST \ + PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \ -H "Authorization: token ${CODEBERG_TOKEN}" \ -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}/comments" \ - --data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \ - log "WARNING: failed to post refusal comment" - rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json" - } + "${API}/pulls" \ + --data-binary @"/tmp/pr-request-${ISSUE}.json") - case "$REFUSAL_STATUS" in - unmet_dependency) - BLOCKED_BY=$(printf '%s' "$REFUSAL_JSON" | jq -r '.blocked_by // "unknown"') - SUGGESTION=$(printf '%s' "$REFUSAL_JSON" | jq -r '.suggestion // empty') - log "unmet dependency: ${BLOCKED_BY}. suggestion: ${SUGGESTION:-none}" - notify "refused #${ISSUE}: unmet dependency — ${BLOCKED_BY}" + PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1) + PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d') + rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json" - COMMENT_BODY="### Blocked by unmet dependency + if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then + PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number') + log "created PR #${PR_NUMBER}" + notify "PR #${PR_NUMBER} created for issue #${ISSUE}: ${ISSUE_TITLE}" + elif [ "$PR_HTTP_CODE" = "409" ]; then + # PR already exists (race condition) — find it + FOUND_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "$BRANCH" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + if [ -n "$FOUND_PR" ]; then + PR_NUMBER="$FOUND_PR" + log "PR already exists: #${PR_NUMBER}" + else + log "ERROR: PR creation got 409 but no existing PR found" + inject_into_session "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the Codeberg API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed." + continue + fi + else + log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})" + notify "failed to create PR (HTTP ${PR_HTTP_CODE})" + inject_into_session "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push origin ${BRANCH}. Then write PHASE:awaiting_ci again." + continue + fi + fi -${BLOCKED_BY}" - if [ -n "$SUGGESTION" ] && [ "$SUGGESTION" != "null" ]; then - COMMENT_BODY="${COMMENT_BODY} + # No CI configured? Treat as success immediately + if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then + log "no CI configured — treating as passed" + inject_into_session "CI passed on PR #${PR_NUMBER} (no CI configured for this project). +Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback." + continue + fi + + # Poll CI until done or timeout + status "waiting for CI on PR #${PR_NUMBER}" + CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \ + curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') + + CI_DONE=false + CI_STATE="unknown" + CI_POLL_ELAPSED=0 + while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do + sleep 30 + CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 )) + + # Check session still alive during CI wait + if ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then + log "session died during CI wait" + break + fi + + CI_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${CI_CURRENT_SHA}/status" | jq -r '.state // "unknown"') + if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + CI_DONE=true + [ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0 + break + fi + done + + if ! $CI_DONE; then + log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s" + notify "CI timeout on PR #${PR_NUMBER}" + inject_into_session "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:needs_human if you cannot proceed." + continue + fi + + log "CI: ${CI_STATE}" + + if [ "$CI_STATE" = "success" ]; then + inject_into_session "CI passed on PR #${PR_NUMBER}. +Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback: + echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" + else + # Fetch CI error details + PIPELINE_NUM=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${CI_CURRENT_SHA}/status" | \ + jq -r '.statuses[0].target_url // ""' | grep -oP 'pipeline/\K[0-9]+' | head -1 || true) + + FAILED_STEP="" + FAILED_EXIT="" + IS_INFRA=false + if [ -n "$PIPELINE_NUM" ]; then + FAILED_INFO=$(curl -sf \ + -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ + "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \ + jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true) + FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1) + FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2) + fi + + log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}" + + case "${FAILED_STEP}" in git*) IS_INFRA=true ;; esac + case "${FAILED_EXIT}" in 128|137) IS_INFRA=true ;; esac + + if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then + CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 )) + log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})" + (cd "$WORKTREE" && git commit --allow-empty \ + -m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1) + (cd "$WORKTREE" && git push origin "$BRANCH" --force 2>&1 | tail -3) + # Touch phase file so we recheck CI on the new SHA + touch "$PHASE_FILE" + LAST_PHASE_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) + CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true) + continue + fi + + CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 )) + if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then + log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating" + echo "{\"issue\":${ISSUE},\"pr\":${PR_NUMBER},\"reason\":\"ci_exhausted\",\"step\":\"${FAILED_STEP:-unknown}\",\"attempts\":${CI_FIX_COUNT},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ + >> "${FACTORY_ROOT}/supervisor/escalations.jsonl" + notify "CI exhausted after ${CI_FIX_COUNT} attempts — escalated to supervisor" + printf 'PHASE:failed\nReason: ci_exhausted after %d attempts\n' "$CI_FIX_COUNT" > "$PHASE_FILE" + LAST_PHASE_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) + continue + fi + + CI_ERROR_LOG="" + if [ -n "$PIPELINE_NUM" ]; then + CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "") + fi + + # Save CI result for crash recovery + printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \ + "$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \ + > "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true + + inject_into_session "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}). + +Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?}) + +CI debug tool: + bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0} + bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} + +Error snippet: +${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.} + +Instructions: +1. Run ci-debug.sh failures to get the full error output. +2. Read the failing test file(s) — understand what the tests EXPECT. +3. Fix the root cause — do NOT weaken tests. +4. Commit your fix and push: git push origin ${BRANCH} +5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" +6. Stop and wait." + fi + + # ── PHASE: awaiting_review ────────────────────────────────────────────────── + elif [ "$CURRENT_PHASE" = "PHASE:awaiting_review" ]; then + status "waiting for review on PR #${PR_NUMBER:-?}" + CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle + + if [ -z "${PR_NUMBER:-}" ]; then + log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR" + FOUND_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "$BRANCH" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + if [ -n "$FOUND_PR" ]; then + PR_NUMBER="$FOUND_PR" + log "found PR #${PR_NUMBER}" + else + inject_into_session "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push origin ${BRANCH}, then write PHASE:awaiting_ci." + continue + fi + fi + + REVIEW_POLL_ELAPSED=0 + REVIEW_FOUND=false + while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do + sleep 300 # 5 min between review checks + REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 )) + + # Check session still alive + if ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then + log "session died during review wait" + REVIEW_FOUND=false + break + fi + + # Check if phase was updated while we wait (e.g., Claude reacted to something) + NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) + if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then + log "phase file updated during review wait — re-entering main loop" + # Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer + # loop detects the change on its next tick and dispatches the new phase. + REVIEW_FOUND=true # Prevent timeout injection + break + fi + + REVIEW_SHA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true + REVIEW_COMMENT=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${PR_NUMBER}/comments?limit=50" | \ + jq -r --arg sha "$REVIEW_SHA" \ + '[.[] | select(.body | contains(" - -### Changes made: -${CHANGE_SUMMARY} - ---- -*Addressed at \`$(git rev-parse HEAD | head -c 7)\` · automated by dev-agent*" - - curl -sf -o /dev/null -X POST \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${PR_NUMBER}/comments" \ - -d "$(jq -n --arg body "$DEV_COMMENT" '{body: $body}')" 2>/dev/null || \ - log "WARNING: failed to post dev-response comment" + log "WARNING: unknown phase value: ${CURRENT_PHASE}" fi done -if [ "$REVIEW_ROUND" -ge "$MAX_REVIEW_ROUNDS" ]; then - log "hit max review rounds (${MAX_REVIEW_ROUNDS})" - notify "PR #${PR_NUMBER}: hit ${MAX_REVIEW_ROUNDS} review rounds, needs human attention" -fi - -cleanup_labels -# Keep worktree if PR is still open (recovery can reuse session context) -if [ -n "${PR_NUMBER:-}" ]; then - PR_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ - "${API}/pulls/${PR_NUMBER}" | jq -r '.state // "unknown"') || true - if [ "$PR_STATE" = "open" ]; then - log "keeping worktree (PR #${PR_NUMBER} still open, session preserved for recovery)" - else - cleanup_worktree - fi -else - cleanup_worktree -fi log "dev-agent finished for issue #${ISSUE}"