diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index 6c2336c..924c78e 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -57,6 +57,30 @@ json.dump(d,open(f,'w')) " 2>/dev/null || true } +# Check whether an issue/PR has been escalated to supervisor (unprocessed or processed) +is_escalated() { + local issue="$1" pr="$2" + local esc_file="${FACTORY_ROOT}/supervisor/escalations.jsonl" + local done_file="${FACTORY_ROOT}/supervisor/escalations.done.jsonl" + python3 -c " +import json, sys +for path in ['${FACTORY_ROOT}/supervisor/escalations.jsonl', + '${FACTORY_ROOT}/supervisor/escalations.done.jsonl']: + try: + with open(path) as fh: + for line in fh: + line = line.strip() + if not line: + continue + d = json.loads(line) + if d.get('issue') == ${issue} and d.get('pr') == ${pr}: + sys.exit(0) + except OSError: + pass +sys.exit(1) +" 2>/dev/null && return 0 || return 1 +} + REPO="${CODEBERG_REPO}" API="${CODEBERG_API}" @@ -232,9 +256,9 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then FIX_ATTEMPTS=$(ci_fix_count "$HAS_PR") - if [ "$FIX_ATTEMPTS" -ge 3 ]; then + if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$HAS_PR"; then # Already escalated — skip silently, let pipeline continue to backlog - log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — skipping" + log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping" # Only write escalation + alert once (first time hitting 3) if [ "$FIX_ATTEMPTS" -eq 3 ]; then echo "{\"issue\":${ISSUE_NUM},\"pr\":${HAS_PR},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ @@ -313,9 +337,9 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do exit 0 elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then FIX_ATTEMPTS=$(ci_fix_count "$PR_NUM") - if [ "$FIX_ATTEMPTS" -ge 3 ]; then + if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$STUCK_ISSUE" "$PR_NUM"; then # Already escalated — skip to let pipeline continue - log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — skipping" + log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping" if [ "$FIX_ATTEMPTS" -eq 3 ]; then echo "{\"issue\":${STUCK_ISSUE},\"pr\":${PR_NUM},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ >> "${FACTORY_ROOT}/supervisor/escalations.jsonl" @@ -390,11 +414,14 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then FIX_ATTEMPTS=$(ci_fix_count "$EXISTING_PR") - if [ "$FIX_ATTEMPTS" -ge 3 ]; then - log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — exhausted ${FIX_ATTEMPTS} attempts, escalated (not blocking pipeline)" - echo "{\"issue\":${ISSUE_NUM},\"pr\":${EXISTING_PR},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ - >> "${FACTORY_ROOT}/supervisor/escalations.jsonl" - matrix_send "dev" "🚨 PR #${EXISTING_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated" 2>/dev/null || true + if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$EXISTING_PR"; then + log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — escalated to supervisor, skipping (not blocking pipeline)" + if [ "$FIX_ATTEMPTS" -eq 3 ]; then + echo "{\"issue\":${ISSUE_NUM},\"pr\":${EXISTING_PR},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ + >> "${FACTORY_ROOT}/supervisor/escalations.jsonl" + matrix_send "dev" "🚨 PR #${EXISTING_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated" 2>/dev/null || true + ci_fix_increment "$EXISTING_PR" # bump to 4 to prevent re-alert + fi # Don't add to WAITING_PRS — escalated PRs should not block new work continue fi diff --git a/supervisor/supervisor-poll.sh b/supervisor/supervisor-poll.sh index 6e9169c..8d6e7f9 100755 --- a/supervisor/supervisor-poll.sh +++ b/supervisor/supervisor-poll.sh @@ -216,11 +216,162 @@ for logfile in "${FACTORY_ROOT}"/{dev,review,supervisor}/*.log; do fi done -# Check for dev-agent escalations +# Process dev-agent escalations — create sub-issues for each CI failure ESCALATION_FILE="${FACTORY_ROOT}/supervisor/escalations.jsonl" +ESCALATION_DONE="${FACTORY_ROOT}/supervisor/escalations.done.jsonl" + if [ -s "$ESCALATION_FILE" ]; then ESCALATION_COUNT=$(wc -l < "$ESCALATION_FILE") - p3 "Dev-agent escalated ${ESCALATION_COUNT} issue(s) — see ${ESCALATION_FILE}" + flog "Processing ${ESCALATION_COUNT} escalation(s) from dev-agent" + + while IFS= read -r esc_entry; do + [ -z "$esc_entry" ] && continue + + ESC_ISSUE=$(echo "$esc_entry" | jq -r '.issue // empty') + ESC_PR=$(echo "$esc_entry" | jq -r '.pr // empty') + ESC_ATTEMPTS=$(echo "$esc_entry" | jq -r '.attempts // 3') + + if [ -z "$ESC_ISSUE" ] || [ -z "$ESC_PR" ]; then + echo "$esc_entry" >> "$ESCALATION_DONE" + continue + fi + + flog "Escalation: issue #${ESC_ISSUE} PR #${ESC_PR} (${ESC_ATTEMPTS} CI attempt(s))" + + # Fetch the failing pipeline for this PR + ESC_PR_SHA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${CODEBERG_API}/pulls/${ESC_PR}" 2>/dev/null | jq -r '.head.sha // ""') || true + + ESC_PIPELINE="" + ESC_SUB_ISSUES_CREATED=0 + ESC_GENERIC_FAIL="" + + if [ -n "$ESC_PR_SHA" ]; then + ESC_PIPELINE=$(wpdb -c "SELECT number FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND commit='${ESC_PR_SHA}' ORDER BY created DESC LIMIT 1;" 2>/dev/null | xargs || true) + fi + + if [ -n "$ESC_PIPELINE" ]; then + FAILED_STEPS=$(curl -sf \ + -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ + "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${ESC_PIPELINE}" 2>/dev/null | \ + jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.pid)\t\(.name)"' 2>/dev/null || true) + + while IFS=$'\t' read -r step_pid step_name; do + [ -z "$step_pid" ] && continue + step_logs=$(woodpecker-cli pipeline log show "${CODEBERG_REPO}" "${ESC_PIPELINE}" "${step_pid}" 2>/dev/null | tail -150 || true) + [ -z "$step_logs" ] && continue + + if echo "$step_name" | grep -qi "shellcheck"; then + # Create one sub-issue per file with ShellCheck errors + sc_files=$(echo "$step_logs" | grep -oP '(?<=In )\S+(?= line \d+:)' | sort -u || true) + + while IFS= read -r sc_file; do + [ -z "$sc_file" ] && continue + file_errors=$(echo "$step_logs" | grep -A3 "In ${sc_file} line" | head -30) + sc_codes=$(echo "$step_logs" | grep -oP 'SC\d+' | sort -u | tr '\n' ' ' | sed 's/ $//' || true) + + sub_title="fix: ShellCheck errors in ${sc_file} (from PR #${ESC_PR})" + sub_body="## ShellCheck CI failure — \`${sc_file}\` + +Spawned by supervisor from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)). + +### Errors +\`\`\` +${file_errors} +\`\`\` + +Fix all ShellCheck errors${sc_codes:+ (${sc_codes})} in \`${sc_file}\` so PR #${ESC_PR} CI passes. + +### Context +- Parent issue: #${ESC_ISSUE} +- PR: #${ESC_PR} +- Pipeline: #${ESC_PIPELINE} (step: ${step_name})" + + new_issue=$(curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${CODEBERG_API}/issues" \ + -d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \ + '{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true + + if [ -n "$new_issue" ]; then + flog "Created sub-issue #${new_issue}: ShellCheck in ${sc_file} (from #${ESC_ISSUE})" + fixed "Sub-issue #${new_issue}: ShellCheck errors in ${sc_file} (escalated from #${ESC_ISSUE})" + ESC_SUB_ISSUES_CREATED=$((ESC_SUB_ISSUES_CREATED + 1)) + matrix_send "supervisor" "📋 Created sub-issue #${new_issue}: ShellCheck in ${sc_file} (from escalated #${ESC_ISSUE})" 2>/dev/null || true + fi + done <<< "$sc_files" + + else + # Accumulate non-ShellCheck failures for one combined issue + ESC_GENERIC_FAIL="${ESC_GENERIC_FAIL} +=== ${step_name} === +$(echo "$step_logs" | tail -50)" + fi + done <<< "$FAILED_STEPS" + fi + + # Create one sub-issue for all non-ShellCheck CI failures + if [ -n "$ESC_GENERIC_FAIL" ]; then + sub_title="fix: CI failures in PR #${ESC_PR} (from issue #${ESC_ISSUE})" + sub_body="## CI failure — fix required + +Spawned by supervisor from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)). + +### Failed step output +\`\`\`${ESC_GENERIC_FAIL} +\`\`\` + +### Context +- Parent issue: #${ESC_ISSUE} +- PR: #${ESC_PR}${ESC_PIPELINE:+ +- Pipeline: #${ESC_PIPELINE}}" + + new_issue=$(curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${CODEBERG_API}/issues" \ + -d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \ + '{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true + + if [ -n "$new_issue" ]; then + flog "Created sub-issue #${new_issue}: CI failures for PR #${ESC_PR} (from #${ESC_ISSUE})" + fixed "Sub-issue #${new_issue}: CI failures for PR #${ESC_PR} (escalated from #${ESC_ISSUE})" + ESC_SUB_ISSUES_CREATED=$((ESC_SUB_ISSUES_CREATED + 1)) + matrix_send "supervisor" "📋 Created sub-issue #${new_issue}: CI failures for PR #${ESC_PR} (from escalated #${ESC_ISSUE})" 2>/dev/null || true + fi + fi + + # Fallback: no CI logs available — create a generic investigation issue + if [ "$ESC_SUB_ISSUES_CREATED" -eq 0 ]; then + sub_title="fix: investigate CI failure for PR #${ESC_PR} (from issue #${ESC_ISSUE})" + sub_body="## CI failure — investigation required + +Spawned by supervisor from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)). CI logs were unavailable at escalation time. + +Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the PR can merge." + + new_issue=$(curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${CODEBERG_API}/issues" \ + -d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \ + '{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true + + if [ -n "$new_issue" ]; then + flog "Created fallback sub-issue #${new_issue} for escalated #${ESC_ISSUE}" + fixed "Fallback sub-issue #${new_issue}: investigate CI for PR #${ESC_PR} (escalated from #${ESC_ISSUE})" + matrix_send "supervisor" "📋 Created sub-issue #${new_issue}: investigate CI for PR #${ESC_PR} (from escalated #${ESC_ISSUE})" 2>/dev/null || true + fi + fi + + # Mark as processed + echo "$esc_entry" >> "$ESCALATION_DONE" + done < "$ESCALATION_FILE" + + # Clear processed escalations + > "$ESCALATION_FILE" + flog "Escalations processed — moved to $(basename "$ESCALATION_DONE")" fi # #############################################################################