Merge pull request 'fix: PHASE:crashed unhandled in _on_phase_change / dev-agent callback (#339)' (#443) from fix/issue-339 into main

This commit is contained in:
johba 2026-03-21 04:39:02 +01:00
commit f511a6c7a7
2 changed files with 34 additions and 9 deletions

View file

@ -131,6 +131,16 @@ cleanup_labels() {
"${API}/issues/${ISSUE}/labels/${IN_PROGRESS_LABEL_ID}" >/dev/null 2>&1 || true
}
restore_to_backlog() {
cleanup_labels
curl -sf -X POST \
-H "Authorization: token ${CODEBERG_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}/labels" \
-d "{\"labels\":[${BACKLOG_LABEL_ID}]}" >/dev/null 2>&1 || true
CLAIMED=false # Don't unclaim again in cleanup()
}
CLAIMED=false
cleanup() {
rm -f "$LOCKFILE" "$STATUSFILE"
@ -750,13 +760,7 @@ case "${_MONITOR_LOOP_EXIT:-}" in
echo "{\"issue\":${ISSUE},\"pr\":${PR_NUMBER:-0},\"reason\":\"${_MONITOR_LOOP_EXIT:-idle_timeout}\",\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
# Restore labels: remove in-progress, add backlog
cleanup_labels
curl -sf -X POST \
-H "Authorization: token ${CODEBERG_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}/labels" \
-d "{\"labels\":[${BACKLOG_LABEL_ID}]}" >/dev/null 2>&1 || true
CLAIMED=false # Don't unclaim again in cleanup()
restore_to_backlog
if [ -n "${PR_NUMBER:-}" ]; then
log "keeping worktree (PR #${PR_NUMBER} still open)"
else
@ -767,8 +771,10 @@ case "${_MONITOR_LOOP_EXIT:-}" in
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
;;
crash_recovery_failed)
cleanup_labels
crashed)
# Belt-and-suspenders: _on_phase_change(PHASE:crashed) handles primary
# cleanup (escalation, notification, labels, worktree, files).
restore_to_backlog
;;
done)
# Belt-and-suspenders: callback in phase-handler.sh handles primary cleanup,

View file

@ -714,6 +714,25 @@ $(printf '%s' "$REFUSAL_JSON" | head -c 2000)
return 1
fi
# ── PHASE: crashed ──────────────────────────────────────────────────────────
# Session died unexpectedly (OOM kill, tmux crash, etc.). Escalate to
# supervisor and restore issue to backlog so it can be retried.
elif [ "$phase" = "PHASE:crashed" ]; then
log "session crashed for issue #${ISSUE}"
notify_ctx \
"session crashed unexpectedly — escalating" \
"session crashed unexpectedly — escalating${PR_NUMBER:+ | PR <a href='${CODEBERG_WEB}/pulls/${PR_NUMBER}'>#${PR_NUMBER}</a>}"
echo "{\"issue\":${ISSUE},\"pr\":${PR_NUMBER:-0},\"reason\":\"crashed\",\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
# Restore backlog label, clean up worktree + temp files
restore_to_backlog
[ -z "${PR_NUMBER:-}" ] && cleanup_worktree
[ -n "${PR_NUMBER:-}" ] && log "keeping worktree (PR #${PR_NUMBER} still open)"
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "$THREAD_FILE" "${SCRATCH_FILE:-}" \
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
else
log "WARNING: unknown phase value: ${phase}"
fi