fix: Preserve worktrees on crash for debugging (#726)
On crash (PHASE:crashed or non-zero exit), preserve the worktree and log its location instead of destroying it unconditionally. Successful sessions still clean up normally. Supervisor runs housekeeping to remove stale crashed worktrees older than 24h. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b4c053b3ed
commit
f716a75351
4 changed files with 66 additions and 6 deletions
|
|
@ -174,6 +174,53 @@ formula_phase_callback() {
|
|||
esac
|
||||
}
|
||||
|
||||
# ── Stale crashed worktree cleanup ─────────────────────────────────────────
|
||||
|
||||
# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS]
|
||||
# Removes preserved crashed worktrees older than MAX_AGE_HOURS (default 24).
|
||||
# Scans /tmp for orphaned worktrees matching agent naming patterns.
|
||||
# Safe to call from any agent; intended for supervisor/gardener housekeeping.
|
||||
# Requires globals: PROJECT_REPO_ROOT.
|
||||
cleanup_stale_crashed_worktrees() {
|
||||
local max_age_hours="${1:-24}"
|
||||
local max_age_seconds=$((max_age_hours * 3600))
|
||||
local now
|
||||
now=$(date +%s)
|
||||
local cleaned=0
|
||||
|
||||
# Collect active tmux pane working directories for safety check
|
||||
local active_dirs=""
|
||||
active_dirs=$(tmux list-panes -a -F '#{pane_current_path}' 2>/dev/null || true)
|
||||
|
||||
local wt_dir
|
||||
for wt_dir in /tmp/*-worktree-* /tmp/action-*-[0-9]* /tmp/disinto-*; do
|
||||
[ -d "$wt_dir" ] || continue
|
||||
# Must be a git worktree (has .git file or directory)
|
||||
[ -f "$wt_dir/.git" ] || [ -d "$wt_dir/.git" ] || continue
|
||||
|
||||
# Check age (use directory mtime)
|
||||
local dir_mtime
|
||||
dir_mtime=$(stat -c %Y "$wt_dir" 2>/dev/null || echo "$now")
|
||||
local age=$((now - dir_mtime))
|
||||
[ "$age" -lt "$max_age_seconds" ] && continue
|
||||
|
||||
# Skip if an active tmux pane is using this worktree
|
||||
if [ -n "$active_dirs" ] && echo "$active_dirs" | grep -qF "$wt_dir"; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Remove the worktree
|
||||
git -C "${PROJECT_REPO_ROOT}" worktree remove "$wt_dir" --force 2>/dev/null || rm -rf "$wt_dir"
|
||||
log "cleaned stale crashed worktree: ${wt_dir} (age: $((age / 3600))h)"
|
||||
cleaned=$((cleaned + 1))
|
||||
done
|
||||
|
||||
# Prune any dangling worktree references
|
||||
git -C "${PROJECT_REPO_ROOT}" worktree prune 2>/dev/null || true
|
||||
|
||||
[ "$cleaned" -gt 0 ] && log "cleaned ${cleaned} stale crashed worktree(s)"
|
||||
}
|
||||
|
||||
# ── Scratch file helpers (compaction survival) ────────────────────────────
|
||||
|
||||
# build_scratch_instruction SCRATCH_FILE
|
||||
|
|
@ -306,8 +353,12 @@ run_formula_and_monitor() {
|
|||
|
||||
matrix_send "$agent_name" "${agent_name^} session finished (${FINAL_PHASE:-no phase})" 2>/dev/null || true
|
||||
|
||||
# Clean up per-agent worktree — "the runtime creates and destroys"
|
||||
remove_formula_worktree
|
||||
# Preserve worktree on crash for debugging; clean up on success
|
||||
if [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ]; then
|
||||
log "PRESERVED crashed worktree for debugging: ${_FORMULA_SESSION_WORKDIR:-}"
|
||||
else
|
||||
remove_formula_worktree
|
||||
fi
|
||||
|
||||
log "--- ${agent_name^} run done ---"
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue