diff --git a/dev/dev-agent.sh b/dev/dev-agent.sh index 4beaf95..477de5f 100755 --- a/dev/dev-agent.sh +++ b/dev/dev-agent.sh @@ -131,6 +131,16 @@ cleanup_labels() { "${API}/issues/${ISSUE}/labels/${IN_PROGRESS_LABEL_ID}" >/dev/null 2>&1 || true } +restore_to_backlog() { + cleanup_labels + curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/labels" \ + -d "{\"labels\":[${BACKLOG_LABEL_ID}]}" >/dev/null 2>&1 || true + CLAIMED=false # Don't unclaim again in cleanup() +} + CLAIMED=false cleanup() { rm -f "$LOCKFILE" "$STATUSFILE" @@ -750,13 +760,7 @@ case "${_MONITOR_LOOP_EXIT:-}" in echo "{\"issue\":${ISSUE},\"pr\":${PR_NUMBER:-0},\"reason\":\"${_MONITOR_LOOP_EXIT:-idle_timeout}\",\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ >> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl" # Restore labels: remove in-progress, add backlog - cleanup_labels - curl -sf -X POST \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}/labels" \ - -d "{\"labels\":[${BACKLOG_LABEL_ID}]}" >/dev/null 2>&1 || true - CLAIMED=false # Don't unclaim again in cleanup() + restore_to_backlog if [ -n "${PR_NUMBER:-}" ]; then log "keeping worktree (PR #${PR_NUMBER} still open)" else @@ -767,8 +771,10 @@ case "${_MONITOR_LOOP_EXIT:-}" in "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" [ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}" ;; - crash_recovery_failed) - cleanup_labels + crashed) + # Belt-and-suspenders: _on_phase_change(PHASE:crashed) handles primary + # cleanup (escalation, notification, labels, worktree, files). + restore_to_backlog ;; done) # Belt-and-suspenders: callback in phase-handler.sh handles primary cleanup, diff --git a/dev/phase-handler.sh b/dev/phase-handler.sh index 68d2ab6..e7e1568 100644 --- a/dev/phase-handler.sh +++ b/dev/phase-handler.sh @@ -714,6 +714,25 @@ $(printf '%s' "$REFUSAL_JSON" | head -c 2000) return 1 fi + # ── PHASE: crashed ────────────────────────────────────────────────────────── + # Session died unexpectedly (OOM kill, tmux crash, etc.). Escalate to + # supervisor and restore issue to backlog so it can be retried. + elif [ "$phase" = "PHASE:crashed" ]; then + log "session crashed for issue #${ISSUE}" + notify_ctx \ + "session crashed unexpectedly — escalating" \ + "session crashed unexpectedly — escalating${PR_NUMBER:+ | PR #${PR_NUMBER}}" + echo "{\"issue\":${ISSUE},\"pr\":${PR_NUMBER:-0},\"reason\":\"crashed\",\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \ + >> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl" + + # Restore backlog label, clean up worktree + temp files + restore_to_backlog + [ -z "${PR_NUMBER:-}" ] && cleanup_worktree + [ -n "${PR_NUMBER:-}" ] && log "keeping worktree (PR #${PR_NUMBER} still open)" + rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "$THREAD_FILE" "${SCRATCH_FILE:-}" \ + "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" + [ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}" + else log "WARNING: unknown phase value: ${phase}" fi