#!/usr/bin/env bash # dev/phase-handler.sh — Phase callback functions for dev-agent.sh # # Source this file from agent orchestrators after lib/agent-session.sh is loaded. # Defines: post_refusal_comment(), _on_phase_change(), build_phase_protocol_prompt() # # Required globals (set by calling agent before or after sourcing): # ISSUE, FORGE_TOKEN, API, FORGE_WEB, PROJECT_NAME, FACTORY_ROOT # BRANCH, PHASE_FILE, WORKTREE, IMPL_SUMMARY_FILE, THREAD_FILE # PRIMARY_BRANCH, SESSION_NAME, LOGFILE, ISSUE_TITLE # WOODPECKER_REPO_ID, WOODPECKER_TOKEN, WOODPECKER_SERVER # # Globals with defaults (agents can override after sourcing): # PR_NUMBER, CI_POLL_TIMEOUT, MAX_CI_FIXES, MAX_REVIEW_ROUNDS, # REVIEW_POLL_TIMEOUT, CI_RETRY_COUNT, CI_FIX_COUNT, REVIEW_ROUND, # CLAIMED, PHASE_POLL_INTERVAL # # Calls back to agent-defined helpers: # cleanup_worktree(), cleanup_labels(), notify(), notify_ctx(), status(), log() # # shellcheck shell=bash # shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling # shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh # Load secret scanner for redacting tmux output before posting to issues # shellcheck source=../lib/secret-scan.sh source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh" # Load shared CI helpers (is_infra_step, classify_pipeline_failure, etc.) # shellcheck source=../lib/ci-helpers.sh source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh" # Load mirror push helper # shellcheck source=../lib/mirrors.sh source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh" # --- Default globals (agents can override after sourcing) --- : "${CI_POLL_TIMEOUT:=1800}" : "${REVIEW_POLL_TIMEOUT:=10800}" : "${MAX_CI_FIXES:=3}" : "${MAX_REVIEW_ROUNDS:=5}" : "${CI_RETRY_COUNT:=0}" : "${CI_FIX_COUNT:=0}" : "${REVIEW_ROUND:=0}" : "${PR_NUMBER:=}" : "${CLAIMED:=false}" : "${PHASE_POLL_INTERVAL:=30}" # --- Post diagnostic comment + label issue as blocked --- # Captures tmux pane output, posts a structured comment on the issue, removes # in-progress label, and adds the "blocked" label. # # Args: reason [session_name] # Uses globals: ISSUE, SESSION_NAME, PR_NUMBER, FORGE_TOKEN, API post_blocked_diagnostic() { local reason="$1" local session="${2:-${SESSION_NAME:-}}" # Capture last 50 lines from tmux pane (before kill) local tmux_output="" if [ -n "$session" ] && tmux has-session -t "$session" 2>/dev/null; then tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true) fi # Redact any secrets from tmux output before posting to issue if [ -n "$tmux_output" ]; then tmux_output=$(redact_secrets "$tmux_output") fi # Build diagnostic comment body local comment comment="### Session failure diagnostic | Field | Value | |---|---| | Exit reason | \`${reason}\` | | Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` |" [ -n "${PR_NUMBER:-}" ] && [ "${PR_NUMBER:-0}" != "0" ] && \ comment="${comment} | PR | #${PR_NUMBER} |" if [ -n "$tmux_output" ]; then comment="${comment}
Last 50 lines from tmux pane \`\`\` ${tmux_output} \`\`\`
" fi # Post comment to issue curl -sf -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${API}/issues/${ISSUE}/comments" \ -d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true # Remove in-progress, add blocked cleanup_labels local blocked_id blocked_id=$(ensure_blocked_label_id) if [ -n "$blocked_id" ]; then curl -sf -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${API}/issues/${ISSUE}/labels" \ -d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true fi CLAIMED=false _BLOCKED_POSTED=true } # --- Build phase protocol prompt (shared across agents) --- # Generates the phase-signaling instructions for Claude prompts. # Args: phase_file summary_file branch # Output: The protocol text (stdout) build_phase_protocol_prompt() { local _pf="$1" _sf="$2" _br="$3" cat <<_PHASE_PROTOCOL_EOF_ ## Phase-Signaling Protocol (REQUIRED) You are running in a persistent tmux session managed by an orchestrator. Communicate progress by writing to the phase file. The orchestrator watches this file and injects events (CI results, review feedback) back into this session. ### Key files \`\`\` PHASE_FILE="${_pf}" SUMMARY_FILE="${_sf}" \`\`\` ### Phase transitions — write these exactly: **After committing and pushing your branch:** \`\`\`bash git push origin ${_br} # Write a short summary of what you implemented: printf '%s' "" > "\${SUMMARY_FILE}" # Signal the orchestrator to create the PR and watch for CI: echo "PHASE:awaiting_ci" > "${_pf}" \`\`\` Then STOP and wait. The orchestrator will inject CI results. **When you receive a "CI passed" injection:** \`\`\`bash echo "PHASE:awaiting_review" > "${_pf}" \`\`\` Then STOP and wait. The orchestrator will inject review feedback. **When you receive a "CI failed:" injection:** Fix the CI issue, commit, push, then: \`\`\`bash echo "PHASE:awaiting_ci" > "${_pf}" \`\`\` Then STOP and wait. **When you receive a "Review: REQUEST_CHANGES" injection:** Address ALL review feedback, commit, push, then: \`\`\`bash echo "PHASE:awaiting_ci" > "${_pf}" \`\`\` (CI runs again after each push — always write awaiting_ci, not awaiting_review) **When you need human help (CI exhausted, merge blocked, stuck on a decision):** \`\`\`bash printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}" \`\`\` Then STOP and wait. A human will reply via Matrix and the response will be injected. **On unrecoverable failure:** \`\`\`bash printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}" \`\`\` _PHASE_PROTOCOL_EOF_ } # --- Merge helper --- # do_merge — attempt to merge PR via forge API. # Args: pr_num # Returns: # 0 = merged successfully # 1 = other failure (conflict, network error, etc.) # 2 = not enough approvals (HTTP 405) — PHASE:escalate already written do_merge() { local pr_num="$1" local merge_response merge_http_code merge_body merge_response=$(curl -s -w "\n%{http_code}" -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${API}/pulls/${pr_num}/merge" \ -d '{"Do":"merge","delete_branch_after_merge":true}') || true merge_http_code=$(echo "$merge_response" | tail -1) merge_body=$(echo "$merge_response" | sed '$d') if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then log "do_merge: PR #${pr_num} merged (HTTP ${merge_http_code})" return 0 fi # HTTP 405 — could be "merge requirements not met" OR "already merged" (race with dev-poll). # Before escalating, check whether the PR was already merged by another agent. if [ "$merge_http_code" = "405" ]; then local pr_state pr_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${pr_num}" | jq -r '.merged // false') || pr_state="false" if [ "$pr_state" = "true" ]; then log "do_merge: PR #${pr_num} already merged (detected after HTTP 405) — treating as success" return 0 fi log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" printf 'PHASE:escalate\nReason: %s\n' \ "PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \ > "$PHASE_FILE" return 2 fi log "do_merge: PR #${pr_num} merge failed (HTTP ${merge_http_code}): ${merge_body:0:200}" return 1 } # --- Refusal comment helper --- post_refusal_comment() { local emoji="$1" title="$2" body="$3" local last_has_title last_has_title=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues/${ISSUE}/comments?limit=5" | \ jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true if [ "$last_has_title" = "true" ]; then log "skipping duplicate refusal comment: ${title}" return 0 fi local comment comment="${emoji} **Dev-agent: ${title}** ${body} --- *Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" printf '%s' "$comment" > "/tmp/refusal-comment.txt" jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json" curl -sf -o /dev/null -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${API}/issues/${ISSUE}/comments" \ --data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \ log "WARNING: failed to post refusal comment" rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json" } # ============================================================================= # PHASE DISPATCH CALLBACK # ============================================================================= # _on_phase_change — Phase dispatch callback for monitor_phase_loop # Receives the current phase as $1. # Returns 0 to continue the loop, 1 to break (terminal phase reached). _on_phase_change() { local phase="$1" # ── PHASE: awaiting_ci ────────────────────────────────────────────────────── if [ "$phase" = "PHASE:awaiting_ci" ]; then # Create PR if not yet created if [ -z "${PR_NUMBER:-}" ]; then status "creating PR for issue #${ISSUE}" IMPL_SUMMARY="" if [ -f "$IMPL_SUMMARY_FILE" ]; then # Don't treat refusal JSON as a PR summary if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE") fi fi printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt" jq -n \ --arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \ --rawfile body "/tmp/pr-body-${ISSUE}.txt" \ --arg head "$BRANCH" \ --arg base "${PRIMARY_BRANCH}" \ '{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json" PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${API}/pulls" \ --data-binary @"/tmp/pr-request-${ISSUE}.json") PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1) PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d') rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json" if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number') log "created PR #${PR_NUMBER}" PR_URL="${FORGE_WEB}/pulls/${PR_NUMBER}" notify_ctx \ "PR #${PR_NUMBER} created: ${ISSUE_TITLE}" \ "PR #${PR_NUMBER} created: ${ISSUE_TITLE}" elif [ "$PR_HTTP_CODE" = "409" ]; then # PR already exists (race condition) — find it FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls?state=open&limit=20" | \ jq -r --arg branch "$BRANCH" \ '.[] | select(.head.ref == $branch) | .number' | head -1) || true if [ -n "$FOUND_PR" ]; then PR_NUMBER="$FOUND_PR" log "PR already exists: #${PR_NUMBER}" else log "ERROR: PR creation got 409 but no existing PR found" agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the forge API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed." return 0 fi else log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})" notify "failed to create PR (HTTP ${PR_HTTP_CODE})" agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push origin ${BRANCH}. Then write PHASE:awaiting_ci again." return 0 fi fi # No CI configured? Treat as success immediately if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then log "no CI configured — treating as passed" agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER} (no CI configured for this project). Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback." return 0 fi # Poll CI until done or timeout status "waiting for CI on PR #${PR_NUMBER}" CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \ curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') CI_DONE=false CI_STATE="unknown" CI_POLL_ELAPSED=0 while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do sleep 30 CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 )) # Check session still alive during CI wait (exit_marker + tmux fallback) if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then log "session died during CI wait" break fi # Re-fetch HEAD — Claude may have pushed new commits since loop started CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || echo "$CI_CURRENT_SHA") CI_STATE=$(ci_commit_status "$CI_CURRENT_SHA") if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then CI_DONE=true [ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0 break fi done if ! $CI_DONE; then log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s" notify "CI timeout on PR #${PR_NUMBER}" agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed." return 0 fi log "CI: ${CI_STATE}" if [ "$CI_STATE" = "success" ]; then agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER}. Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback: echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" else # Fetch CI error details PIPELINE_NUM=$(ci_pipeline_number "$CI_CURRENT_SHA") FAILED_STEP="" FAILED_EXIT="" IS_INFRA=false if [ -n "$PIPELINE_NUM" ]; then FAILED_INFO=$(curl -sf \ -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \ jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true) FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1) FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2) fi log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}" if [ -n "$FAILED_STEP" ] && is_infra_step "$FAILED_STEP" "${FAILED_EXIT:-0}" >/dev/null 2>&1; then IS_INFRA=true fi if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 )) log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})" (cd "$WORKTREE" && git commit --allow-empty \ -m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1) (cd "$WORKTREE" && git push origin "$BRANCH" --force 2>&1 | tail -3) # Touch phase file so we recheck CI on the new SHA # Do NOT update LAST_PHASE_MTIME here — let the main loop detect the fresh mtime touch "$PHASE_FILE" CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true) return 0 fi CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 )) _ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}" if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating" local _mention_html="" [ -n "${MATRIX_MENTION_USER:-}" ] && _mention_html="${MATRIX_MENTION_USER} " notify_ctx \ "CI exhausted after ${CI_FIX_COUNT} attempts — escalating for human help" \ "${_mention_html}CI exhausted after ${CI_FIX_COUNT} attempts on PR #${PR_NUMBER} | Pipeline
Step: ${FAILED_STEP:-unknown} — escalating for human help" printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE" # Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate return 0 fi CI_ERROR_LOG="" if [ -n "$PIPELINE_NUM" ]; then CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "") fi # Save CI result for crash recovery printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \ "$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \ > "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true # Notify Matrix with rich CI failure context _ci_snippet=$(printf '%s' "${CI_ERROR_LOG:-}" | tail -5 | head -c 500 | sed 's/&/\&/g; s//\>/g') notify_ctx \ "CI failed on PR #${PR_NUMBER}: step=${FAILED_STEP:-unknown} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES})" \ "CI failed on PR #${PR_NUMBER} | Pipeline #${PIPELINE_NUM:-?}
Step: ${FAILED_STEP:-unknown} (exit ${FAILED_EXIT:-?})
Attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}
${_ci_snippet:-no logs}
" agent_inject_into_session "$SESSION_NAME" "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}). Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?}) CI debug tool: bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0} bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} Error snippet: ${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.} Instructions: 1. Run ci-debug.sh failures to get the full error output. 2. Read the failing test file(s) — understand what the tests EXPECT. 3. Fix the root cause — do NOT weaken tests. 4. Commit your fix and push: git push origin ${BRANCH} 5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" 6. Stop and wait." fi # ── PHASE: awaiting_review ────────────────────────────────────────────────── elif [ "$phase" = "PHASE:awaiting_review" ]; then status "waiting for review on PR #${PR_NUMBER:-?}" CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle if [ -z "${PR_NUMBER:-}" ]; then log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR" FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls?state=open&limit=20" | \ jq -r --arg branch "$BRANCH" \ '.[] | select(.head.ref == $branch) | .number' | head -1) || true if [ -n "$FOUND_PR" ]; then PR_NUMBER="$FOUND_PR" log "found PR #${PR_NUMBER}" else agent_inject_into_session "$SESSION_NAME" "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push origin ${BRANCH}, then write PHASE:awaiting_ci." return 0 fi fi REVIEW_POLL_ELAPSED=0 REVIEW_FOUND=false while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do sleep 300 # 5 min between review checks REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 )) # Check session still alive (exit_marker + tmux fallback) if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then log "session died during review wait" REVIEW_FOUND=false break fi # Check if phase was updated while we wait (e.g., Claude reacted to something) NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then log "phase file updated during review wait — re-entering main loop" # Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer # loop detects the change on its next tick and dispatches the new phase. REVIEW_FOUND=true # Prevent timeout injection # Clean up review-poll sentinel if it exists (session already advanced) rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}" break fi REVIEW_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true REVIEW_COMMENT=$(forge_api_all "/issues/${PR_NUMBER}/comments" | \ jq -r --arg sha "$REVIEW_SHA" \ '[.[] | select(.body | contains("