From b60811f0a1a049259f32687e0aafe1bd9a82ce71 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 25 Mar 2026 08:34:22 +0000 Subject: [PATCH] fix: supervisor should clean up stale PHASE:escalate files for closed issues (#664) Add auto-cleanup to supervisor/preflight.sh: PHASE:escalate files whose parent issue/PR is confirmed closed (via Forge API) are deleted after a 24h grace period. Cleanup results appear in the preflight output for journal logging by the supervisor formula. Co-Authored-By: Claude Opus 4.6 (1M context) --- formulas/run-supervisor.toml | 13 ++++++++-- supervisor/preflight.sh | 46 ++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/formulas/run-supervisor.toml b/formulas/run-supervisor.toml index 0a685d0..0ef5707 100644 --- a/formulas/run-supervisor.toml +++ b/formulas/run-supervisor.toml @@ -28,9 +28,12 @@ The pre-flight metrics have already been collected by supervisor/preflight.sh and injected into your prompt above. Review them now. 1. Read the injected metrics data carefully (System Resources, Docker, - Active Sessions, Phase Files, Lock Files, Agent Logs, CI Pipelines, - Open PRs, Issue Status, Stale Worktrees, Pending Escalations, + Active Sessions, Phase Files, Stale Phase Cleanup, Lock Files, Agent Logs, + CI Pipelines, Open PRs, Issue Status, Stale Worktrees, Pending Escalations, Escalation Replies). + Note: preflight.sh auto-removes PHASE:escalate files for closed issues + (24h grace period). Check the "Stale Phase Cleanup" section for any + files cleaned or in grace period this run. 2. If there are escalation replies from Matrix (human messages), note them — you will act on them in the decide-actions step. @@ -68,6 +71,8 @@ Categorize every finding from the metrics into priority levels. - Pipeline stalled: backlog issues exist but no agent ran for > 20min - Dev-agent blocked: last N polls all report "no ready issues" - Dev/action sessions in PHASE:escalate for > 24h (escalation timeout) + (Note: PHASE:escalate files for closed issues are auto-cleaned by preflight; + this check covers escalations where the issue is still open) ### P3 — Factory degraded - PRs stale: CI finished >20min ago AND no git push to the PR branch since CI completed @@ -119,6 +124,10 @@ For each finding from the health assessment, decide and execute an action. cd "$PROJECT_REPO_ROOT" git checkout "$PRIMARY_BRANCH" 2>/dev/null +**P4 Stale PHASE:escalate files (closed issues):** + Already handled by preflight.sh auto-cleanup. Check "Stale Phase Cleanup" + in the metrics for results. Log any cleanups in the journal. + **P4 Stale worktrees:** git -C "$PROJECT_REPO_ROOT" worktree remove --force /tmp/stale-worktree 2>/dev/null git -C "$PROJECT_REPO_ROOT" worktree prune 2>/dev/null diff --git a/supervisor/preflight.sh b/supervisor/preflight.sh index 2d8e55c..50067d6 100755 --- a/supervisor/preflight.sh +++ b/supervisor/preflight.sh @@ -68,6 +68,52 @@ done [ "$_found_phase" = false ] && echo " None" echo "" +# ── Stale Phase Cleanup ───────────────────────────────────────────────── +# Auto-remove PHASE:escalate files whose parent issue/PR is confirmed closed. +# Grace period: 24h after issue closure to avoid race conditions. + +echo "## Stale Phase Cleanup" +_cleaned_any=false +for _pf in /tmp/*-session-*.phase; do + [ -f "$_pf" ] || continue + _phase_line=$(head -1 "$_pf" 2>/dev/null || echo "") + # Only target PHASE:escalate files + case "$_phase_line" in + PHASE:escalate*) ;; + *) continue ;; + esac + # Extract issue number: *-session-{PROJECT_NAME}-{number}.phase + _base=$(basename "$_pf" .phase) + if [[ "$_base" =~ -session-${PROJECT_NAME}-([0-9]+)$ ]]; then + _issue_num="${BASH_REMATCH[1]}" + else + continue + fi + # Query Forge for issue/PR state + _issue_json=$(forge_api GET "/issues/${_issue_num}" 2>/dev/null || echo "") + [ -n "$_issue_json" ] || continue + _state=$(printf '%s' "$_issue_json" | jq -r '.state // empty' 2>/dev/null) + [ "$_state" = "closed" ] || continue + # Enforce 24h grace period after closure + _closed_at=$(printf '%s' "$_issue_json" | jq -r '.closed_at // empty' 2>/dev/null) + [ -n "$_closed_at" ] || continue + _closed_epoch=$(date -d "$_closed_at" +%s 2>/dev/null || echo 0) + _now=$(date +%s) + _elapsed=$(( _now - _closed_epoch )) + if [ "$_elapsed" -gt 86400 ]; then + rm -f "$_pf" + echo " Cleaned: $(basename "$_pf") — issue #${_issue_num} closed at ${_closed_at}" + _cleaned_any=true + else + _remaining_h=$(( (86400 - _elapsed) / 3600 )) + echo " Grace: $(basename "$_pf") — issue #${_issue_num} closed, ${_remaining_h}h remaining" + fi +done +if [ "$_cleaned_any" = false ]; then + echo " None" +fi +echo "" + # ── Lock Files ──────────────────────────────────────────────────────────── echo "## Lock Files"