fix: PHASE:crashed unhandled in _on_phase_change / dev-agent callback (#339)

Add explicit PHASE:crashed case to _on_phase_change in phase-handler.sh:
logs crash, notifies Matrix, escalates to supervisor, restores backlog
label, preserves worktree if PR exists, cleans up temp files.

Add crashed case to dev-agent.sh post-loop case statement for
belt-and-suspenders cleanup matching the callback behavior.

Replaces the dead crash_recovery_failed case that was never triggered.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-21 01:31:20 +00:00
parent e8dc145184
commit 7f9cefa847
2 changed files with 46 additions and 1 deletions

View file

@ -767,8 +767,24 @@ case "${_MONITOR_LOOP_EXIT:-}" in
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
;;
crash_recovery_failed)
crashed)
# Belt-and-suspenders: _on_phase_change(PHASE:crashed) handles primary
# cleanup, but ensure labels and files are cleaned up if callback was
# interrupted (e.g. set -e propagation).
cleanup_labels
curl -sf -X POST \
-H "Authorization: token ${CODEBERG_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}/labels" \
-d "{\"labels\":[${BACKLOG_LABEL_ID}]}" >/dev/null 2>&1 || true
CLAIMED=false
if [ -z "${PR_NUMBER:-}" ]; then
cleanup_worktree
fi
rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" \
"$IMPL_SUMMARY_FILE" "$THREAD_FILE" "$SCRATCH_FILE" \
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
;;
done)
# Belt-and-suspenders: callback in phase-handler.sh handles primary cleanup,

View file

@ -714,6 +714,35 @@ $(printf '%s' "$REFUSAL_JSON" | head -c 2000)
return 1
fi
# ── PHASE: crashed ──────────────────────────────────────────────────────────
# Session died unexpectedly (OOM kill, tmux crash, etc.). Escalate to
# supervisor and restore issue to backlog so it can be retried.
elif [ "$phase" = "PHASE:crashed" ]; then
log "session crashed for issue #${ISSUE}"
notify_ctx \
"session crashed unexpectedly — escalating" \
"session crashed unexpectedly — escalating${PR_NUMBER:+ | PR <a href='${CODEBERG_WEB}/pulls/${PR_NUMBER}'>#${PR_NUMBER}</a>}"
echo "{\"issue\":${ISSUE},\"pr\":${PR_NUMBER:-0},\"reason\":\"crashed\",\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
# Restore backlog label so issue can be retried
cleanup_labels
curl -sf -X POST \
-H "Authorization: token ${CODEBERG_TOKEN}" \
-H "Content-Type: application/json" \
"${API}/issues/${ISSUE}/labels" \
-d "{\"labels\":[${BACKLOG_LABEL_ID}]}" >/dev/null 2>&1 || true
CLAIMED=false # Don't unclaim again in cleanup()
if [ -n "${PR_NUMBER:-}" ]; then
log "keeping worktree (PR #${PR_NUMBER} still open)"
else
cleanup_worktree
fi
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "$THREAD_FILE" "${SCRATCH_FILE:-}" \
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
else
log "WARNING: unknown phase value: ${phase}"
fi