fix: refactor: replace escalation JSONL with blocked label + diagnostic comment (#352)

Replace the unreliable escalation JSONL system (supervisor/escalations-*.jsonl consumed by gardener) with direct blocked label + diagnostic comment on the original issue. When a dev-agent or action-agent session fails (PHASE:failed, idle timeout, crash, CI exhausted): - Capture last 50 lines from tmux pane via tmux capture-pane - Post a structured diagnostic comment on the issue (exit reason, timestamp, PR number, tmux output) - Label the issue "blocked" (instead of restoring "backlog") - Remove in-progress label Removed: - Escalation JSONL write paths in dev-agent.sh, phase-handler.sh, dev-poll.sh, action-agent.sh - is_escalated() helper in dev-poll.sh - Escalation triage (P2f section) in supervisor-poll.sh - Escalation processing + recipe engine in gardener-poll.sh - ci-escalation-recipes step from run-gardener.toml formula - escalations*.jsonl from .gitignore Added: - post_blocked_diagnostic() shared helper in phase-handler.sh - ensure_blocked_label_id() helper (creates label via API if not exists) - is_blocked() helper in dev-poll.sh (replaces is_escalated) - Blocked issues listing in supervisor/preflight.sh Kept: - Matrix notifications on failure (unchanged) - CI fix counter logic (still tracks attempts) - needs_human injection in supervisor/gardener (not escalation-related) - Gardener grooming (gardener-agent.sh still invoked) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 04:18:43 +00:00 · 2026-03-21 04:18:43 +00:00 · 61c44d31b1
commit 61c44d31b1
parent 0109f0b0c3
10 changed files with 181 additions and 990 deletions
--- a/formulas/run-gardener.toml
+++ b/formulas/run-gardener.toml
@ -1,17 +1,16 @@
 # formulas/run-gardener.toml — Gardener housekeeping formula
 #
 # Defines the gardener's complete run: grooming (Claude session via
-# gardener-agent.sh) + CI escalation recipes (bash, gardener-poll.sh)
-# + AGENTS.md maintenance + final commit-and-pr.
+# gardener-agent.sh) + blocked-review + AGENTS.md maintenance + final
+# commit-and-pr.
 #
 # No memory, no journal. The gardener does mechanical housekeeping
 # based on current state — it doesn't need to remember past runs.
 #
-# Steps: preflight → grooming → blocked-review → ci-escalation-recipes
-#        → agents-update → commit-and-pr
+# Steps: preflight → grooming → blocked-review → agents-update → commit-and-pr

 name        = "run-gardener"
-description = "Mechanical housekeeping: grooming, blocked review, CI escalation recipes, docs update"
+description = "Mechanical housekeeping: grooming, blocked review, docs update"
 version     = 1

 [context]
@ -184,40 +183,7 @@ CRITICAL: If this step fails, log the failure and move on.
 needs = ["grooming"]

 # ─────────────────────────────────────────────────────────────────────
-# Step 4: ci-escalation-recipes — recipe-driven CI failure handling
-# ─────────────────────────────────────────────────────────────────────
-
-[[steps]]
-id    = "ci-escalation-recipes"
-title = "CI escalation recipes (bash — gardener-poll.sh)"
-executor = "bash"
-script   = "gardener/gardener-poll.sh --recipes-only"
-description = """
-NOT a Claude step — executed by gardener-poll.sh before/after the Claude session.
-Documented here so the formula covers the full gardener run.
-
-gardener-poll.sh processes CI escalation entries from
-supervisor/escalations-{project}.jsonl. Each entry is a dev-agent session
-that exhausted its CI fix attempts and was escalated to the gardener.
-
-The recipe engine (match_recipe function in gardener-poll.sh) matches each
-escalation against gardener/recipes/*.toml by priority order, then executes
-the matched recipe's playbook actions via bash functions.
-
-Recipes (see gardener/recipes/*.toml for definitions):
- chicken-egg-ci (priority 10): non-blocking bypass + per-file fix issues
- cascade-rebase (priority 20): rebase via Gitea API, re-approve, retry merge
- flaky-test (priority 30): retrigger CI or quarantine
- shellcheck-violations (priority 40): per-file ShellCheck fix issues
- Generic fallback: one combined CI failure issue
-
-Special cases:
- idle_timeout / idle_prompt: investigation issues (no recipe matching)
-"""
-needs = ["grooming"]
-
-# ─────────────────────────────────────────────────────────────────────
-# Step 5: agents-update — AGENTS.md watermark staleness check
+# Step 4: agents-update — AGENTS.md watermark staleness check
 # ─────────────────────────────────────────────────────────────────────

 [[steps]]
@ -254,7 +220,7 @@ This keeps documentation fresh — runs 2x/day so drift stays small.
 CRITICAL: If this step fails for any reason, log the failure and move on.
 Do NOT let an AGENTS.md failure prevent the commit-and-pr step.
 """
-needs = ["ci-escalation-recipes"]
+needs = ["blocked-review"]

 # ─────────────────────────────────────────────────────────────────────
 # Step 6: commit-and-pr — single commit with all file changes