From 531ae5cf718e597a3a7054b6ade4d0666fc6cea2 Mon Sep 17 00:00:00 2001 From: johba Date: Tue, 17 Mar 2026 15:14:48 +0100 Subject: [PATCH] fix: escalate once then continue to backlog (#59) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs after #53 merged: 1. Escalation written every poll cycle (4 entries in 30min) — now writes once, bumps counter to 4 to skip 2. Exit after escalation blocked backlog work — now falls through to pick up next issue Co-authored-by: openhands Reviewed-on: https://codeberg.org/johba/disinto/pulls/59 Reviewed-by: review_bot --- dev/dev-poll.sh | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index 7408ec5..6c2336c 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -233,17 +233,23 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then FIX_ATTEMPTS=$(ci_fix_count "$HAS_PR") if [ "$FIX_ATTEMPTS" -ge 3 ]; then - log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — exhausted ${FIX_ATTEMPTS} fix attempts, escalating" - echo "{\"issue\":${ISSUE_NUM},\"pr\":${HAS_PR},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ - >> "${FACTORY_ROOT}/supervisor/escalations.jsonl" - matrix_send "dev" "🚨 PR #${HAS_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated to supervisor" 2>/dev/null || true + # Already escalated — skip silently, let pipeline continue to backlog + log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — skipping" + # Only write escalation + alert once (first time hitting 3) + if [ "$FIX_ATTEMPTS" -eq 3 ]; then + echo "{\"issue\":${ISSUE_NUM},\"pr\":${HAS_PR},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ + >> "${FACTORY_ROOT}/supervisor/escalations.jsonl" + matrix_send "dev" "🚨 PR #${HAS_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated to supervisor" 2>/dev/null || true + ci_fix_increment "$HAS_PR" # bump to 4 so we don't re-alert + fi + # Fall through to backlog scan instead of exit else ci_fix_increment "$HAS_PR" log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt $((FIX_ATTEMPTS+1))/3)" nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" + exit 0 fi - exit 0 else log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" @@ -308,17 +314,22 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then FIX_ATTEMPTS=$(ci_fix_count "$PR_NUM") if [ "$FIX_ATTEMPTS" -ge 3 ]; then - log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI failed — exhausted ${FIX_ATTEMPTS} attempts, escalating" - echo "{\"issue\":${STUCK_ISSUE},\"pr\":${PR_NUM},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ - >> "${FACTORY_ROOT}/supervisor/escalations.jsonl" - matrix_send "dev" "🚨 PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI failed after ${FIX_ATTEMPTS} attempts — escalated" 2>/dev/null || true + # Already escalated — skip to let pipeline continue + log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — skipping" + if [ "$FIX_ATTEMPTS" -eq 3 ]; then + echo "{\"issue\":${STUCK_ISSUE},\"pr\":${PR_NUM},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ + >> "${FACTORY_ROOT}/supervisor/escalations.jsonl" + matrix_send "dev" "🚨 PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI failed after ${FIX_ATTEMPTS} attempts — escalated" 2>/dev/null || true + ci_fix_increment "$PR_NUM" # bump to 4 to prevent re-alert + fi + continue # skip this PR, check next stuck PR or fall through to backlog else ci_fix_increment "$PR_NUM" log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI failed — fixing (attempt $((FIX_ATTEMPTS+1))/3)" nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for stuck PR #${PR_NUM}" + exit 0 fi - exit 0 fi done