fix: address review findings from issue #67 escalation refactor
- supervisor: skip *.done.jsonl in escalation glob (bug: wildcard matched harb.done.jsonl producing spurious 'pending' log noise every cycle) - supervisor: use wc -l instead of grep -c . for line counting (style nit) - supervisor: consume gardener-esc-resolved.log via fixed() so escalation resolutions appear in end-of-cycle supervisor reporting - dev-poll: update all 'escalated to supervisor' log/matrix strings to 'escalated to gardener' (lines 263, 268, 344, 420) - gardener: track _esc_total_created across all escalation entries and write count to supervisor/gardener-esc-resolved.log after processing Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
150ede5605
commit
df2522a7cb
3 changed files with 25 additions and 5 deletions
|
|
@ -260,12 +260,12 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
|
|||
FIX_ATTEMPTS=$(ci_fix_count "$HAS_PR")
|
||||
if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$HAS_PR"; then
|
||||
# Already escalated — skip silently, let pipeline continue to backlog
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping"
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to gardener, skipping"
|
||||
# Only write escalation + alert once (first time hitting 3)
|
||||
if [ "$FIX_ATTEMPTS" -eq 3 ]; then
|
||||
echo "{\"issue\":${ISSUE_NUM},\"pr\":${HAS_PR},\"project\":\"${PROJECT_NAME}\",\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
|
||||
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
|
||||
matrix_send "dev" "🚨 PR #${HAS_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated to supervisor" 2>/dev/null || true
|
||||
matrix_send "dev" "🚨 PR #${HAS_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated to gardener" 2>/dev/null || true
|
||||
ci_fix_increment "$HAS_PR" # bump to 4 so we don't re-alert
|
||||
fi
|
||||
# Fall through to backlog scan instead of exit
|
||||
|
|
@ -341,7 +341,7 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do
|
|||
FIX_ATTEMPTS=$(ci_fix_count "$PR_NUM")
|
||||
if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$STUCK_ISSUE" "$PR_NUM"; then
|
||||
# Already escalated — skip to let pipeline continue
|
||||
log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping"
|
||||
log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to gardener, skipping"
|
||||
if [ "$FIX_ATTEMPTS" -eq 3 ]; then
|
||||
echo "{\"issue\":${STUCK_ISSUE},\"pr\":${PR_NUM},\"project\":\"${PROJECT_NAME}\",\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
|
||||
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
|
||||
|
|
@ -417,7 +417,7 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do
|
|||
elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then
|
||||
FIX_ATTEMPTS=$(ci_fix_count "$EXISTING_PR")
|
||||
if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$EXISTING_PR"; then
|
||||
log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — escalated to supervisor, skipping (not blocking pipeline)"
|
||||
log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — escalated to gardener, skipping (not blocking pipeline)"
|
||||
if [ "$FIX_ATTEMPTS" -eq 3 ]; then
|
||||
echo "{\"issue\":${ISSUE_NUM},\"pr\":${EXISTING_PR},\"project\":\"${PROJECT_NAME}\",\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
|
||||
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
|
||||
|
|
|
|||
|
|
@ -318,6 +318,7 @@ if [ -s "$ESCALATION_FILE" ]; then
|
|||
|
||||
ESCALATION_COUNT=$(wc -l < "$ESCALATION_SNAP")
|
||||
log "Processing ${ESCALATION_COUNT} escalation(s) for ${PROJECT_NAME}"
|
||||
_esc_total_created=0
|
||||
|
||||
while IFS= read -r esc_entry; do
|
||||
[ -z "$esc_entry" ] && continue
|
||||
|
|
@ -402,6 +403,7 @@ Fix all ShellCheck errors${sc_codes:+ (${sc_codes})} in \`${sc_file}\` so PR #${
|
|||
if [ -n "$new_issue" ]; then
|
||||
log "Created sub-issue #${new_issue}: ShellCheck in ${sc_file} (from #${ESC_ISSUE})"
|
||||
ESC_SUB_ISSUES_CREATED=$((ESC_SUB_ISSUES_CREATED + 1))
|
||||
_esc_total_created=$((_esc_total_created + 1))
|
||||
matrix_send "gardener" "📋 Created sub-issue #${new_issue}: ShellCheck in ${sc_file} (from escalated #${ESC_ISSUE})" 2>/dev/null || true
|
||||
fi
|
||||
done <<< "$sc_files"
|
||||
|
|
@ -447,6 +449,7 @@ ${ESC_GENERIC_FAIL}
|
|||
if [ -n "$new_issue" ]; then
|
||||
log "Created sub-issue #${new_issue}: CI failures for PR #${ESC_PR} (from #${ESC_ISSUE})"
|
||||
ESC_SUB_ISSUES_CREATED=$((ESC_SUB_ISSUES_CREATED + 1))
|
||||
_esc_total_created=$((_esc_total_created + 1))
|
||||
matrix_send "gardener" "📋 Created sub-issue #${new_issue}: CI failures for PR #${ESC_PR} (from escalated #${ESC_ISSUE})" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
|
@ -479,6 +482,7 @@ Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the
|
|||
|
||||
if [ -n "$new_issue" ]; then
|
||||
log "Created fallback sub-issue #${new_issue} for escalated #${ESC_ISSUE}"
|
||||
_esc_total_created=$((_esc_total_created + 1))
|
||||
matrix_send "gardener" "📋 Created sub-issue #${new_issue}: investigate CI for PR #${ESC_PR} (from escalated #${ESC_ISSUE})" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
|
@ -489,6 +493,12 @@ Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the
|
|||
|
||||
rm -f "$ESCALATION_SNAP"
|
||||
log "Escalations processed — moved to $(basename "$ESCALATION_DONE")"
|
||||
|
||||
# Report resolution count to supervisor for its fixed() summary
|
||||
if [ "${_esc_total_created:-0}" -gt 0 ]; then
|
||||
printf '%d %s\n' "$_esc_total_created" "$PROJECT_NAME" \
|
||||
>> "${FACTORY_ROOT}/supervisor/gardener-esc-resolved.log"
|
||||
fi
|
||||
fi
|
||||
|
||||
log "--- Gardener poll done ---"
|
||||
|
|
|
|||
|
|
@ -219,13 +219,23 @@ done
|
|||
# Report pending escalations (processing has moved to gardener-poll.sh per-project)
|
||||
for _esc_file in "${FACTORY_ROOT}/supervisor/escalations-"*.jsonl; do
|
||||
[ -f "$_esc_file" ] || continue
|
||||
_esc_count=$(grep -c . "$_esc_file" 2>/dev/null || true)
|
||||
[[ "$_esc_file" == *.done.jsonl ]] && continue
|
||||
_esc_count=$(wc -l < "$_esc_file" 2>/dev/null || true)
|
||||
[ "${_esc_count:-0}" -gt 0 ] || continue
|
||||
_esc_proj=$(basename "$_esc_file" .jsonl)
|
||||
_esc_proj="${_esc_proj#escalations-}"
|
||||
flog "${_esc_proj}: ${_esc_count} escalation(s) pending (gardener will process)"
|
||||
done
|
||||
|
||||
# Pick up escalation resolutions handled by gardener
|
||||
_gesc_log="${FACTORY_ROOT}/supervisor/gardener-esc-resolved.log"
|
||||
if [ -f "$_gesc_log" ]; then
|
||||
while IFS=' ' read -r _gn _gp; do
|
||||
[ -n "${_gn:-}" ] && fixed "${_gp:-unknown}: gardener created ${_gn} sub-issue(s) from escalations"
|
||||
done < "$_gesc_log"
|
||||
rm -f "$_gesc_log"
|
||||
fi
|
||||
|
||||
# #############################################################################
|
||||
# LAYER 2: PER-PROJECT CHECKS
|
||||
# (iterated over projects/*.toml, config-driven)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue