#!/usr/bin/env bash # dev/phase-handler.sh — Phase callback functions for dev-agent.sh # # Source this file from dev-agent.sh after lib/agent-session.sh is loaded. # Defines: post_refusal_comment(), _on_phase_change() # # Required globals from dev-agent.sh: # ISSUE, CODEBERG_TOKEN, API, CODEBERG_WEB, PROJECT_NAME, FACTORY_ROOT # PR_NUMBER, BRANCH, PHASE_FILE, WORKTREE, IMPL_SUMMARY_FILE, THREAD_FILE # PRIMARY_BRANCH, SESSION_NAME, LOGFILE, ISSUE_TITLE # CI_POLL_TIMEOUT, MAX_CI_FIXES, MAX_REVIEW_ROUNDS, REVIEW_POLL_TIMEOUT # CI_RETRY_COUNT, CI_FIX_COUNT, REVIEW_ROUND, CLAIMED # WOODPECKER_REPO_ID, WOODPECKER_TOKEN, WOODPECKER_SERVER # # Calls back to dev-agent.sh-defined helpers: # cleanup_worktree(), cleanup_labels() # # shellcheck shell=bash # shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling # shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh # --- Merge helper --- # do_merge — attempt to merge PR via Codeberg API. # Args: pr_num # Returns: # 0 = merged successfully # 1 = other failure (conflict, network error, etc.) # 2 = not enough approvals (HTTP 405) — PHASE:needs_human already written do_merge() { local pr_num="$1" local merge_response merge_http_code merge_body merge_response=$(curl -s -w "\n%{http_code}" -X POST \ -H "Authorization: token ${CODEBERG_TOKEN}" \ -H 'Content-Type: application/json' \ "${API}/pulls/${pr_num}/merge" \ -d '{"Do":"merge","delete_branch_after_merge":true}') || true merge_http_code=$(echo "$merge_response" | tail -1) merge_body=$(echo "$merge_response" | sed '$d') if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then log "do_merge: PR #${pr_num} merged (HTTP ${merge_http_code})" return 0 fi # HTTP 405 — merge requirements not met (approvals, branch protection); structural, not transient if [ "$merge_http_code" = "405" ]; then log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" printf 'PHASE:needs_human\nReason: %s\n' \ "PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \ > "$PHASE_FILE" return 2 fi log "do_merge: PR #${pr_num} merge failed (HTTP ${merge_http_code}): ${merge_body:0:200}" return 1 } # --- Refusal comment helper --- post_refusal_comment() { local emoji="$1" title="$2" body="$3" local last_has_title last_has_title=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/issues/${ISSUE}/comments?limit=5" | \ jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true if [ "$last_has_title" = "true" ]; then log "skipping duplicate refusal comment: ${title}" return 0 fi local comment comment="${emoji} **Dev-agent: ${title}** ${body} --- *Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" printf '%s' "$comment" > "/tmp/refusal-comment.txt" jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json" curl -sf -o /dev/null -X POST \ -H "Authorization: token ${CODEBERG_TOKEN}" \ -H "Content-Type: application/json" \ "${API}/issues/${ISSUE}/comments" \ --data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \ log "WARNING: failed to post refusal comment" rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json" } # ============================================================================= # PHASE DISPATCH CALLBACK # ============================================================================= # _on_phase_change — Phase dispatch callback for monitor_phase_loop # Receives the current phase as $1. # Returns 0 to continue the loop, 1 to break (terminal phase reached). _on_phase_change() { local phase="$1" # ── PHASE: awaiting_ci ────────────────────────────────────────────────────── if [ "$phase" = "PHASE:awaiting_ci" ]; then # Create PR if not yet created if [ -z "${PR_NUMBER:-}" ]; then status "creating PR for issue #${ISSUE}" IMPL_SUMMARY="" if [ -f "$IMPL_SUMMARY_FILE" ]; then # Don't treat refusal JSON as a PR summary if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE") fi fi printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt" jq -n \ --arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \ --rawfile body "/tmp/pr-body-${ISSUE}.txt" \ --arg head "$BRANCH" \ --arg base "${PRIMARY_BRANCH}" \ '{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json" PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \ -H "Authorization: token ${CODEBERG_TOKEN}" \ -H "Content-Type: application/json" \ "${API}/pulls" \ --data-binary @"/tmp/pr-request-${ISSUE}.json") PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1) PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d') rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json" if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number') log "created PR #${PR_NUMBER}" PR_URL="${CODEBERG_WEB}/pulls/${PR_NUMBER}" notify_ctx \ "PR #${PR_NUMBER} created: ${ISSUE_TITLE}" \ "PR #${PR_NUMBER} created: ${ISSUE_TITLE}" elif [ "$PR_HTTP_CODE" = "409" ]; then # PR already exists (race condition) — find it FOUND_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/pulls?state=open&limit=20" | \ jq -r --arg branch "$BRANCH" \ '.[] | select(.head.ref == $branch) | .number' | head -1) || true if [ -n "$FOUND_PR" ]; then PR_NUMBER="$FOUND_PR" log "PR already exists: #${PR_NUMBER}" else log "ERROR: PR creation got 409 but no existing PR found" agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the Codeberg API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed." return 0 fi else log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})" notify "failed to create PR (HTTP ${PR_HTTP_CODE})" agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push origin ${BRANCH}. Then write PHASE:awaiting_ci again." return 0 fi fi # No CI configured? Treat as success immediately if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then log "no CI configured — treating as passed" agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER} (no CI configured for this project). Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback." return 0 fi # Poll CI until done or timeout status "waiting for CI on PR #${PR_NUMBER}" CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \ curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') CI_DONE=false CI_STATE="unknown" CI_POLL_ELAPSED=0 while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do sleep 30 CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 )) # Check session still alive during CI wait if ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then log "session died during CI wait" break fi CI_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/commits/${CI_CURRENT_SHA}/status" | jq -r '.state // "unknown"') if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then CI_DONE=true [ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0 break fi done if ! $CI_DONE; then log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s" notify "CI timeout on PR #${PR_NUMBER}" agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:needs_human if you cannot proceed." return 0 fi log "CI: ${CI_STATE}" if [ "$CI_STATE" = "success" ]; then agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER}. Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback: echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" else # Fetch CI error details PIPELINE_NUM=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/commits/${CI_CURRENT_SHA}/status" | \ jq -r '.statuses[0].target_url // ""' | grep -oP 'pipeline/\K[0-9]+' | head -1 || true) FAILED_STEP="" FAILED_EXIT="" IS_INFRA=false if [ -n "$PIPELINE_NUM" ]; then FAILED_INFO=$(curl -sf \ -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \ jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true) FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1) FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2) fi log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}" case "${FAILED_STEP}" in git*) IS_INFRA=true ;; esac case "${FAILED_EXIT}" in 128|137) IS_INFRA=true ;; esac if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 )) log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})" (cd "$WORKTREE" && git commit --allow-empty \ -m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1) (cd "$WORKTREE" && git push origin "$BRANCH" --force 2>&1 | tail -3) # Touch phase file so we recheck CI on the new SHA # Do NOT update LAST_PHASE_MTIME here — let the main loop detect the fresh mtime touch "$PHASE_FILE" CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true) return 0 fi CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 )) _ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}" if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating" echo "{\"issue\":${ISSUE},\"pr\":${PR_NUMBER},\"reason\":\"ci_exhausted\",\"step\":\"${FAILED_STEP:-unknown}\",\"attempts\":${CI_FIX_COUNT},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ >> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl" notify_ctx \ "CI exhausted after ${CI_FIX_COUNT} attempts — escalated to supervisor" \ "CI exhausted after ${CI_FIX_COUNT} attempts on PR #${PR_NUMBER} | Pipeline
Step: ${FAILED_STEP:-unknown} — escalated to supervisor" printf 'PHASE:failed\nReason: ci_exhausted after %d attempts\n' "$CI_FIX_COUNT" > "$PHASE_FILE" # Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:failed return 0 fi CI_ERROR_LOG="" if [ -n "$PIPELINE_NUM" ]; then CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "") fi # Save CI result for crash recovery printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \ "$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \ > "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true # Notify Matrix with rich CI failure context _ci_snippet=$(printf '%s' "${CI_ERROR_LOG:-}" | tail -5 | head -c 500 | sed 's/&/\&/g; s//\>/g') notify_ctx \ "CI failed on PR #${PR_NUMBER}: step=${FAILED_STEP:-unknown} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES})" \ "CI failed on PR #${PR_NUMBER} | Pipeline #${PIPELINE_NUM:-?}
Step: ${FAILED_STEP:-unknown} (exit ${FAILED_EXIT:-?})
Attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}
${_ci_snippet:-no logs}
" agent_inject_into_session "$SESSION_NAME" "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}). Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?}) CI debug tool: bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0} bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} Error snippet: ${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.} Instructions: 1. Run ci-debug.sh failures to get the full error output. 2. Read the failing test file(s) — understand what the tests EXPECT. 3. Fix the root cause — do NOT weaken tests. 4. Commit your fix and push: git push origin ${BRANCH} 5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" 6. Stop and wait." fi # ── PHASE: awaiting_review ────────────────────────────────────────────────── elif [ "$phase" = "PHASE:awaiting_review" ]; then status "waiting for review on PR #${PR_NUMBER:-?}" CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle if [ -z "${PR_NUMBER:-}" ]; then log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR" FOUND_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/pulls?state=open&limit=20" | \ jq -r --arg branch "$BRANCH" \ '.[] | select(.head.ref == $branch) | .number' | head -1) || true if [ -n "$FOUND_PR" ]; then PR_NUMBER="$FOUND_PR" log "found PR #${PR_NUMBER}" else agent_inject_into_session "$SESSION_NAME" "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push origin ${BRANCH}, then write PHASE:awaiting_ci." return 0 fi fi REVIEW_POLL_ELAPSED=0 REVIEW_FOUND=false while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do sleep 300 # 5 min between review checks REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 )) # Check session still alive if ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then log "session died during review wait" REVIEW_FOUND=false break fi # Check if phase was updated while we wait (e.g., Claude reacted to something) NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then log "phase file updated during review wait — re-entering main loop" # Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer # loop detects the change on its next tick and dispatches the new phase. REVIEW_FOUND=true # Prevent timeout injection # Clean up review-poll sentinel if it exists (session already advanced) rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}" break fi REVIEW_SHA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true REVIEW_COMMENT=$(codeberg_api_all "/issues/${PR_NUMBER}/comments" | \ jq -r --arg sha "$REVIEW_SHA" \ '[.[] | select(.body | contains("