Merge pull request 'fix: fix: supervisor should clean up stale PHASE:escalate files for closed issues (#664)' (#674) from fix/issue-664 into main

This commit is contained in:
johba 2026-03-25 10:09:21 +01:00
commit b2dd42df40
2 changed files with 55 additions and 2 deletions

View file

@ -28,9 +28,12 @@ The pre-flight metrics have already been collected by supervisor/preflight.sh
and injected into your prompt above. Review them now.
1. Read the injected metrics data carefully (System Resources, Docker,
Active Sessions, Phase Files, Lock Files, Agent Logs, CI Pipelines,
Open PRs, Issue Status, Stale Worktrees, Pending Escalations,
Active Sessions, Phase Files, Stale Phase Cleanup, Lock Files, Agent Logs,
CI Pipelines, Open PRs, Issue Status, Stale Worktrees, Pending Escalations,
Escalation Replies).
Note: preflight.sh auto-removes PHASE:escalate files for closed issues
(24h grace period). Check the "Stale Phase Cleanup" section for any
files cleaned or in grace period this run.
2. If there are escalation replies from Matrix (human messages), note them
you will act on them in the decide-actions step.
@ -68,6 +71,8 @@ Categorize every finding from the metrics into priority levels.
- Pipeline stalled: backlog issues exist but no agent ran for > 20min
- Dev-agent blocked: last N polls all report "no ready issues"
- Dev/action sessions in PHASE:escalate for > 24h (escalation timeout)
(Note: PHASE:escalate files for closed issues are auto-cleaned by preflight;
this check covers escalations where the issue is still open)
### P3 — Factory degraded
- PRs stale: CI finished >20min ago AND no git push to the PR branch since CI completed
@ -119,6 +124,10 @@ For each finding from the health assessment, decide and execute an action.
cd "$PROJECT_REPO_ROOT"
git checkout "$PRIMARY_BRANCH" 2>/dev/null
**P4 Stale PHASE:escalate files (closed issues):**
Already handled by preflight.sh auto-cleanup. Check "Stale Phase Cleanup"
in the metrics for results. Log any cleanups in the journal.
**P4 Stale worktrees:**
git -C "$PROJECT_REPO_ROOT" worktree remove --force /tmp/stale-worktree 2>/dev/null
git -C "$PROJECT_REPO_ROOT" worktree prune 2>/dev/null

View file

@ -68,6 +68,50 @@ done
[ "$_found_phase" = false ] && echo " None"
echo ""
# ── Stale Phase Cleanup ─────────────────────────────────────────────────
# Auto-remove PHASE:escalate files whose parent issue/PR is confirmed closed.
# Grace period: 24h after issue closure to avoid race conditions.
echo "## Stale Phase Cleanup"
_found_stale=false
for _pf in /tmp/*-session-*.phase; do
[ -f "$_pf" ] || continue
_phase_line=$(head -1 "$_pf" 2>/dev/null || echo "")
# Only target PHASE:escalate files
case "$_phase_line" in
PHASE:escalate*) ;;
*) continue ;;
esac
# Extract issue number: *-session-{PROJECT_NAME}-{number}.phase
_base=$(basename "$_pf" .phase)
if [[ "$_base" =~ -session-${PROJECT_NAME}-([0-9]+)$ ]]; then
_issue_num="${BASH_REMATCH[1]}"
else
continue
fi
# Query Forge for issue/PR state
_issue_json=$(forge_api GET "/issues/${_issue_num}" 2>/dev/null || echo "")
[ -n "$_issue_json" ] || continue
_state=$(printf '%s' "$_issue_json" | jq -r '.state // empty' 2>/dev/null)
[ "$_state" = "closed" ] || continue
_found_stale=true
# Enforce 24h grace period after closure
_closed_at=$(printf '%s' "$_issue_json" | jq -r '.closed_at // empty' 2>/dev/null)
[ -n "$_closed_at" ] || continue
_closed_epoch=$(date -d "$_closed_at" +%s 2>/dev/null || echo 0)
_now=$(date +%s)
_elapsed=$(( _now - _closed_epoch ))
if [ "$_elapsed" -gt 86400 ]; then
rm -f "$_pf"
echo " Cleaned: $(basename "$_pf") — issue #${_issue_num} closed at ${_closed_at}"
else
_remaining_h=$(( (86400 - _elapsed) / 3600 ))
echo " Grace: $(basename "$_pf") — issue #${_issue_num} closed, ${_remaining_h}h remaining"
fi
done
[ "$_found_stale" = false ] && echo " None"
echo ""
# ── Lock Files ────────────────────────────────────────────────────────────
echo "## Lock Files"