fix: address review findings from issue #67 escalation refactor

- supervisor: skip *.done.jsonl in escalation glob (bug: wildcard matched
  harb.done.jsonl producing spurious 'pending' log noise every cycle)
- supervisor: use wc -l instead of grep -c . for line counting (style nit)
- supervisor: consume gardener-esc-resolved.log via fixed() so escalation
  resolutions appear in end-of-cycle supervisor reporting
- dev-poll: update all 'escalated to supervisor' log/matrix strings to
  'escalated to gardener' (lines 263, 268, 344, 420)
- gardener: track _esc_total_created across all escalation entries and
  write count to supervisor/gardener-esc-resolved.log after processing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-17 18:30:57 +00:00
parent 150ede5605
commit df2522a7cb
3 changed files with 25 additions and 5 deletions

View file

@ -260,12 +260,12 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
FIX_ATTEMPTS=$(ci_fix_count "$HAS_PR")
if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$HAS_PR"; then
# Already escalated — skip silently, let pipeline continue to backlog
log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping"
log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to gardener, skipping"
# Only write escalation + alert once (first time hitting 3)
if [ "$FIX_ATTEMPTS" -eq 3 ]; then
echo "{\"issue\":${ISSUE_NUM},\"pr\":${HAS_PR},\"project\":\"${PROJECT_NAME}\",\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
matrix_send "dev" "🚨 PR #${HAS_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated to supervisor" 2>/dev/null || true
matrix_send "dev" "🚨 PR #${HAS_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated to gardener" 2>/dev/null || true
ci_fix_increment "$HAS_PR" # bump to 4 so we don't re-alert
fi
# Fall through to backlog scan instead of exit
@ -341,7 +341,7 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do
FIX_ATTEMPTS=$(ci_fix_count "$PR_NUM")
if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$STUCK_ISSUE" "$PR_NUM"; then
# Already escalated — skip to let pipeline continue
log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping"
log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to gardener, skipping"
if [ "$FIX_ATTEMPTS" -eq 3 ]; then
echo "{\"issue\":${STUCK_ISSUE},\"pr\":${PR_NUM},\"project\":\"${PROJECT_NAME}\",\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
@ -417,7 +417,7 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do
elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then
FIX_ATTEMPTS=$(ci_fix_count "$EXISTING_PR")
if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$EXISTING_PR"; then
log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — escalated to supervisor, skipping (not blocking pipeline)"
log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — escalated to gardener, skipping (not blocking pipeline)"
if [ "$FIX_ATTEMPTS" -eq 3 ]; then
echo "{\"issue\":${ISSUE_NUM},\"pr\":${EXISTING_PR},\"project\":\"${PROJECT_NAME}\",\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"