fix: supervisor should clean up stale PHASE:escalate files for closed issues (#664)
Add auto-cleanup to supervisor/preflight.sh: PHASE:escalate files whose parent issue/PR is confirmed closed (via Forge API) are deleted after a 24h grace period. Cleanup results appear in the preflight output for journal logging by the supervisor formula. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
eb373bb961
commit
b60811f0a1
2 changed files with 57 additions and 2 deletions
|
|
@ -28,9 +28,12 @@ The pre-flight metrics have already been collected by supervisor/preflight.sh
|
||||||
and injected into your prompt above. Review them now.
|
and injected into your prompt above. Review them now.
|
||||||
|
|
||||||
1. Read the injected metrics data carefully (System Resources, Docker,
|
1. Read the injected metrics data carefully (System Resources, Docker,
|
||||||
Active Sessions, Phase Files, Lock Files, Agent Logs, CI Pipelines,
|
Active Sessions, Phase Files, Stale Phase Cleanup, Lock Files, Agent Logs,
|
||||||
Open PRs, Issue Status, Stale Worktrees, Pending Escalations,
|
CI Pipelines, Open PRs, Issue Status, Stale Worktrees, Pending Escalations,
|
||||||
Escalation Replies).
|
Escalation Replies).
|
||||||
|
Note: preflight.sh auto-removes PHASE:escalate files for closed issues
|
||||||
|
(24h grace period). Check the "Stale Phase Cleanup" section for any
|
||||||
|
files cleaned or in grace period this run.
|
||||||
|
|
||||||
2. If there are escalation replies from Matrix (human messages), note them —
|
2. If there are escalation replies from Matrix (human messages), note them —
|
||||||
you will act on them in the decide-actions step.
|
you will act on them in the decide-actions step.
|
||||||
|
|
@ -68,6 +71,8 @@ Categorize every finding from the metrics into priority levels.
|
||||||
- Pipeline stalled: backlog issues exist but no agent ran for > 20min
|
- Pipeline stalled: backlog issues exist but no agent ran for > 20min
|
||||||
- Dev-agent blocked: last N polls all report "no ready issues"
|
- Dev-agent blocked: last N polls all report "no ready issues"
|
||||||
- Dev/action sessions in PHASE:escalate for > 24h (escalation timeout)
|
- Dev/action sessions in PHASE:escalate for > 24h (escalation timeout)
|
||||||
|
(Note: PHASE:escalate files for closed issues are auto-cleaned by preflight;
|
||||||
|
this check covers escalations where the issue is still open)
|
||||||
|
|
||||||
### P3 — Factory degraded
|
### P3 — Factory degraded
|
||||||
- PRs stale: CI finished >20min ago AND no git push to the PR branch since CI completed
|
- PRs stale: CI finished >20min ago AND no git push to the PR branch since CI completed
|
||||||
|
|
@ -119,6 +124,10 @@ For each finding from the health assessment, decide and execute an action.
|
||||||
cd "$PROJECT_REPO_ROOT"
|
cd "$PROJECT_REPO_ROOT"
|
||||||
git checkout "$PRIMARY_BRANCH" 2>/dev/null
|
git checkout "$PRIMARY_BRANCH" 2>/dev/null
|
||||||
|
|
||||||
|
**P4 Stale PHASE:escalate files (closed issues):**
|
||||||
|
Already handled by preflight.sh auto-cleanup. Check "Stale Phase Cleanup"
|
||||||
|
in the metrics for results. Log any cleanups in the journal.
|
||||||
|
|
||||||
**P4 Stale worktrees:**
|
**P4 Stale worktrees:**
|
||||||
git -C "$PROJECT_REPO_ROOT" worktree remove --force /tmp/stale-worktree 2>/dev/null
|
git -C "$PROJECT_REPO_ROOT" worktree remove --force /tmp/stale-worktree 2>/dev/null
|
||||||
git -C "$PROJECT_REPO_ROOT" worktree prune 2>/dev/null
|
git -C "$PROJECT_REPO_ROOT" worktree prune 2>/dev/null
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,52 @@ done
|
||||||
[ "$_found_phase" = false ] && echo " None"
|
[ "$_found_phase" = false ] && echo " None"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
|
# ── Stale Phase Cleanup ─────────────────────────────────────────────────
|
||||||
|
# Auto-remove PHASE:escalate files whose parent issue/PR is confirmed closed.
|
||||||
|
# Grace period: 24h after issue closure to avoid race conditions.
|
||||||
|
|
||||||
|
echo "## Stale Phase Cleanup"
|
||||||
|
_cleaned_any=false
|
||||||
|
for _pf in /tmp/*-session-*.phase; do
|
||||||
|
[ -f "$_pf" ] || continue
|
||||||
|
_phase_line=$(head -1 "$_pf" 2>/dev/null || echo "")
|
||||||
|
# Only target PHASE:escalate files
|
||||||
|
case "$_phase_line" in
|
||||||
|
PHASE:escalate*) ;;
|
||||||
|
*) continue ;;
|
||||||
|
esac
|
||||||
|
# Extract issue number: *-session-{PROJECT_NAME}-{number}.phase
|
||||||
|
_base=$(basename "$_pf" .phase)
|
||||||
|
if [[ "$_base" =~ -session-${PROJECT_NAME}-([0-9]+)$ ]]; then
|
||||||
|
_issue_num="${BASH_REMATCH[1]}"
|
||||||
|
else
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
# Query Forge for issue/PR state
|
||||||
|
_issue_json=$(forge_api GET "/issues/${_issue_num}" 2>/dev/null || echo "")
|
||||||
|
[ -n "$_issue_json" ] || continue
|
||||||
|
_state=$(printf '%s' "$_issue_json" | jq -r '.state // empty' 2>/dev/null)
|
||||||
|
[ "$_state" = "closed" ] || continue
|
||||||
|
# Enforce 24h grace period after closure
|
||||||
|
_closed_at=$(printf '%s' "$_issue_json" | jq -r '.closed_at // empty' 2>/dev/null)
|
||||||
|
[ -n "$_closed_at" ] || continue
|
||||||
|
_closed_epoch=$(date -d "$_closed_at" +%s 2>/dev/null || echo 0)
|
||||||
|
_now=$(date +%s)
|
||||||
|
_elapsed=$(( _now - _closed_epoch ))
|
||||||
|
if [ "$_elapsed" -gt 86400 ]; then
|
||||||
|
rm -f "$_pf"
|
||||||
|
echo " Cleaned: $(basename "$_pf") — issue #${_issue_num} closed at ${_closed_at}"
|
||||||
|
_cleaned_any=true
|
||||||
|
else
|
||||||
|
_remaining_h=$(( (86400 - _elapsed) / 3600 ))
|
||||||
|
echo " Grace: $(basename "$_pf") — issue #${_issue_num} closed, ${_remaining_h}h remaining"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ "$_cleaned_any" = false ]; then
|
||||||
|
echo " None"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
|
||||||
# ── Lock Files ────────────────────────────────────────────────────────────
|
# ── Lock Files ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
echo "## Lock Files"
|
echo "## Lock Files"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue