fix: feat: supervisor breaks down escalated CI failures into sub-issues (#52)
- supervisor-poll.sh: replace P3 escalation log with actionable sub-issue creation. For each entry in escalations.jsonl: fetch CI logs via woodpecker-cli, create one sub-issue per file for ShellCheck failures, one combined issue for other CI failures, or a fallback investigation issue if logs are unavailable. Move processed entries to escalations.done.jsonl and clear escalations.jsonl. - dev-poll.sh: add is_escalated() helper that checks both escalations.jsonl and escalations.done.jsonl; use it (alongside ci_fix_count >= 3) in all three CI-fix spawn paths so escalated PRs are skipped even if the ci-fixes tracker is reset. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
531ae5cf71
commit
d9520f48a6
2 changed files with 189 additions and 11 deletions
|
|
@ -57,6 +57,30 @@ json.dump(d,open(f,'w'))
|
|||
" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Check whether an issue/PR has been escalated to supervisor (unprocessed or processed)
|
||||
is_escalated() {
|
||||
local issue="$1" pr="$2"
|
||||
local esc_file="${FACTORY_ROOT}/supervisor/escalations.jsonl"
|
||||
local done_file="${FACTORY_ROOT}/supervisor/escalations.done.jsonl"
|
||||
python3 -c "
|
||||
import json, sys
|
||||
for path in ['${FACTORY_ROOT}/supervisor/escalations.jsonl',
|
||||
'${FACTORY_ROOT}/supervisor/escalations.done.jsonl']:
|
||||
try:
|
||||
with open(path) as fh:
|
||||
for line in fh:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
d = json.loads(line)
|
||||
if d.get('issue') == ${issue} and d.get('pr') == ${pr}:
|
||||
sys.exit(0)
|
||||
except OSError:
|
||||
pass
|
||||
sys.exit(1)
|
||||
" 2>/dev/null && return 0 || return 1
|
||||
}
|
||||
|
||||
REPO="${CODEBERG_REPO}"
|
||||
|
||||
API="${CODEBERG_API}"
|
||||
|
|
@ -232,9 +256,9 @@ if [ "$ORPHAN_COUNT" -gt 0 ]; then
|
|||
|
||||
elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then
|
||||
FIX_ATTEMPTS=$(ci_fix_count "$HAS_PR")
|
||||
if [ "$FIX_ATTEMPTS" -ge 3 ]; then
|
||||
if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$HAS_PR"; then
|
||||
# Already escalated — skip silently, let pipeline continue to backlog
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — skipping"
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping"
|
||||
# Only write escalation + alert once (first time hitting 3)
|
||||
if [ "$FIX_ATTEMPTS" -eq 3 ]; then
|
||||
echo "{\"issue\":${ISSUE_NUM},\"pr\":${HAS_PR},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
|
||||
|
|
@ -313,9 +337,9 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do
|
|||
exit 0
|
||||
elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then
|
||||
FIX_ATTEMPTS=$(ci_fix_count "$PR_NUM")
|
||||
if [ "$FIX_ATTEMPTS" -ge 3 ]; then
|
||||
if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$STUCK_ISSUE" "$PR_NUM"; then
|
||||
# Already escalated — skip to let pipeline continue
|
||||
log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — skipping"
|
||||
log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI exhausted (${FIX_ATTEMPTS} attempts) — escalated to supervisor, skipping"
|
||||
if [ "$FIX_ATTEMPTS" -eq 3 ]; then
|
||||
echo "{\"issue\":${STUCK_ISSUE},\"pr\":${PR_NUM},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
|
||||
>> "${FACTORY_ROOT}/supervisor/escalations.jsonl"
|
||||
|
|
@ -390,11 +414,14 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do
|
|||
|
||||
elif ! ci_passed "$CI_STATE" && [ "$CI_STATE" != "" ] && [ "$CI_STATE" != "pending" ] && [ "$CI_STATE" != "unknown" ]; then
|
||||
FIX_ATTEMPTS=$(ci_fix_count "$EXISTING_PR")
|
||||
if [ "$FIX_ATTEMPTS" -ge 3 ]; then
|
||||
log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — exhausted ${FIX_ATTEMPTS} attempts, escalated (not blocking pipeline)"
|
||||
echo "{\"issue\":${ISSUE_NUM},\"pr\":${EXISTING_PR},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
|
||||
>> "${FACTORY_ROOT}/supervisor/escalations.jsonl"
|
||||
matrix_send "dev" "🚨 PR #${EXISTING_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated" 2>/dev/null || true
|
||||
if [ "$FIX_ATTEMPTS" -ge 3 ] || is_escalated "$ISSUE_NUM" "$EXISTING_PR"; then
|
||||
log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — escalated to supervisor, skipping (not blocking pipeline)"
|
||||
if [ "$FIX_ATTEMPTS" -eq 3 ]; then
|
||||
echo "{\"issue\":${ISSUE_NUM},\"pr\":${EXISTING_PR},\"reason\":\"ci_exhausted_poll\",\"attempts\":${FIX_ATTEMPTS},\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
|
||||
>> "${FACTORY_ROOT}/supervisor/escalations.jsonl"
|
||||
matrix_send "dev" "🚨 PR #${EXISTING_PR} (issue #${ISSUE_NUM}) CI failed after ${FIX_ATTEMPTS} attempts — escalated" 2>/dev/null || true
|
||||
ci_fix_increment "$EXISTING_PR" # bump to 4 to prevent re-alert
|
||||
fi
|
||||
# Don't add to WAITING_PRS — escalated PRs should not block new work
|
||||
continue
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue