From df2522a7cbb4cd903f627f5fd00feb25197f56b9 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 17 Mar 2026 18:30:57 +0000 Subject: [PATCH] fix: address review findings from issue #67 escalation refactor - supervisor: skip *.done.jsonl in escalation glob (bug: wildcard matched harb.done.jsonl producing spurious 'pending' log noise every cycle) - supervisor: use wc -l instead of grep -c . for line counting (style nit) - supervisor: consume gardener-esc-resolved.log via fixed() so escalation resolutions appear in end-of-cycle supervisor reporting - dev-poll: update all 'escalated to supervisor' log/matrix strings to 'escalated to gardener' (lines 263, 268, 344, 420) - gardener: track _esc_total_created across all escalation entries and write count to supervisor/gardener-esc-resolved.log after processing Co-Authored-By: Claude Sonnet 4.6 --- dev/dev-poll.sh | 8 ++++---- gardener/gardener-poll.sh | 10 ++++++++++ supervisor/supervisor-poll.sh | 12 +++++++++++- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index d3b7b72..03302d7 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -260,12 +260,12 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then FIX_ATTEMPTS=$(ci_fix_count "$HAS_PR") if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$HAS_PR"; then # Already escalated — skip silently, let pipeline continue to backlog - log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping" + log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to gardener, skipping" # Only write escalation + alert once (first time hitting 3) if [ "$FIX_ATTEMPTS" -eq 3 ]; then echo "{\"issue\":${ISSUE_NUM},\"pr\":${HAS_PR},\"project\":\"${PROJECT_NAME}\",\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ >> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl" - matrix_send "dev" "🚨 PR #${HAS_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated to supervisor" 2>/dev/null || true + matrix_send "dev" "🚨 PR #${HAS_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated to gardener" 2>/dev/null || true ci_fix_increment "$HAS_PR" # bump to 4 so we don't re-alert fi # Fall through to backlog scan instead of exit @@ -341,7 +341,7 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do FIX_ATTEMPTS=$(ci_fix_count "$PR_NUM") if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$STUCK_ISSUE" "$PR_NUM"; then # Already escalated — skip to let pipeline continue - log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping" + log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to gardener, skipping" if [ "$FIX_ATTEMPTS" -eq 3 ]; then echo "{\"issue\":${STUCK_ISSUE},\"pr\":${PR_NUM},\"project\":\"${PROJECT_NAME}\",\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ >> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl" @@ -417,7 +417,7 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then FIX_ATTEMPTS=$(ci_fix_count "$EXISTING_PR") if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$EXISTING_PR"; then - log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — escalated to supervisor, skipping (not blocking pipeline)" + log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — escalated to gardener, skipping (not blocking pipeline)" if [ "$FIX_ATTEMPTS" -eq 3 ]; then echo "{\"issue\":${ISSUE_NUM},\"pr\":${EXISTING_PR},\"project\":\"${PROJECT_NAME}\",\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ >> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl" diff --git a/gardener/gardener-poll.sh b/gardener/gardener-poll.sh index 75d9521..f363257 100755 --- a/gardener/gardener-poll.sh +++ b/gardener/gardener-poll.sh @@ -318,6 +318,7 @@ if [ -s "$ESCALATION_FILE" ]; then ESCALATION_COUNT=$(wc -l < "$ESCALATION_SNAP") log "Processing ${ESCALATION_COUNT} escalation(s) for ${PROJECT_NAME}" + _esc_total_created=0 while IFS= read -r esc_entry; do [ -z "$esc_entry" ] && continue @@ -402,6 +403,7 @@ Fix all ShellCheck errors${sc_codes:+ (${sc_codes})} in \`${sc_file}\` so PR #${ if [ -n "$new_issue" ]; then log "Created sub-issue #${new_issue}: ShellCheck in ${sc_file} (from #${ESC_ISSUE})" ESC_SUB_ISSUES_CREATED=$((ESC_SUB_ISSUES_CREATED + 1)) + _esc_total_created=$((_esc_total_created + 1)) matrix_send "gardener" "📋 Created sub-issue #${new_issue}: ShellCheck in ${sc_file} (from escalated #${ESC_ISSUE})" 2>/dev/null || true fi done <<< "$sc_files" @@ -447,6 +449,7 @@ ${ESC_GENERIC_FAIL} if [ -n "$new_issue" ]; then log "Created sub-issue #${new_issue}: CI failures for PR #${ESC_PR} (from #${ESC_ISSUE})" ESC_SUB_ISSUES_CREATED=$((ESC_SUB_ISSUES_CREATED + 1)) + _esc_total_created=$((_esc_total_created + 1)) matrix_send "gardener" "📋 Created sub-issue #${new_issue}: CI failures for PR #${ESC_PR} (from escalated #${ESC_ISSUE})" 2>/dev/null || true fi fi @@ -479,6 +482,7 @@ Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the if [ -n "$new_issue" ]; then log "Created fallback sub-issue #${new_issue} for escalated #${ESC_ISSUE}" + _esc_total_created=$((_esc_total_created + 1)) matrix_send "gardener" "📋 Created sub-issue #${new_issue}: investigate CI for PR #${ESC_PR} (from escalated #${ESC_ISSUE})" 2>/dev/null || true fi fi @@ -489,6 +493,12 @@ Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the rm -f "$ESCALATION_SNAP" log "Escalations processed — moved to $(basename "$ESCALATION_DONE")" + + # Report resolution count to supervisor for its fixed() summary + if [ "${_esc_total_created:-0}" -gt 0 ]; then + printf '%d %s\n' "$_esc_total_created" "$PROJECT_NAME" \ + >> "${FACTORY_ROOT}/supervisor/gardener-esc-resolved.log" + fi fi log "--- Gardener poll done ---" diff --git a/supervisor/supervisor-poll.sh b/supervisor/supervisor-poll.sh index 8268d74..a62a1a1 100755 --- a/supervisor/supervisor-poll.sh +++ b/supervisor/supervisor-poll.sh @@ -219,13 +219,23 @@ done # Report pending escalations (processing has moved to gardener-poll.sh per-project) for _esc_file in "${FACTORY_ROOT}/supervisor/escalations-"*.jsonl; do [ -f "$_esc_file" ] || continue - _esc_count=$(grep -c . "$_esc_file" 2>/dev/null || true) + [[ "$_esc_file" == *.done.jsonl ]] && continue + _esc_count=$(wc -l < "$_esc_file" 2>/dev/null || true) [ "${_esc_count:-0}" -gt 0 ] || continue _esc_proj=$(basename "$_esc_file" .jsonl) _esc_proj="${_esc_proj#escalations-}" flog "${_esc_proj}: ${_esc_count} escalation(s) pending (gardener will process)" done +# Pick up escalation resolutions handled by gardener +_gesc_log="${FACTORY_ROOT}/supervisor/gardener-esc-resolved.log" +if [ -f "$_gesc_log" ]; then + while IFS=' ' read -r _gn _gp; do + [ -n "${_gn:-}" ] && fixed "${_gp:-unknown}: gardener created ${_gn} sub-issue(s) from escalations" + done < "$_gesc_log" + rm -f "$_gesc_log" +fi + # ############################################################################# # LAYER 2: PER-PROJECT CHECKS # (iterated over projects/*.toml, config-driven)