From 6f75ab0a0415abd87c2279ea6708f23f1196ea86 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 20 Mar 2026 09:28:24 +0000
Subject: [PATCH 1/2] fix: feat: planner triages prediction/unreviewed issues
 alongside gap analysis (#142)

Expand the triage-predictions step in run-planner.toml with four explicit
triage actions (PROMOTE_ACTION, PROMOTE_BACKLOG, WATCH, DISMISS), each
with API execution details and mandatory reasoning comments. Promoted
predictions now close the original with "Actioned as #NNN" and compete
with vision gaps for the per-cycle 5-issue limit in strategic-planning.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 AGENTS.md                 | 11 ++++--
 formulas/run-planner.toml | 80 ++++++++++++++++++++++++++++++---------
 2 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 56443c0..1254a3b 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -153,10 +153,13 @@ Phase 0 (preflight): pull latest code, load persistent memory from
 `planner/MEMORY.md`. Phase 1: update the AGENTS.md documentation tree to
 reflect recent code changes (fast-track PR). Phase 1.5: triage
 `prediction/unreviewed` issues filed by the [Predictor](#predictor-planner) —
-accept as action/backlog issues or dismiss as noise. Phase 2: strategic planning
+for each prediction: promote to action, promote to backlog, watch (relabel to
+prediction/backlog), or dismiss with reasoning. Promoted predictions compete
+with vision gaps for the per-cycle issue limit. Phase 2: strategic planning
 via resource+leverage gap analysis — reasons about VISION.md, RESOURCES.md,
-formula catalog, and project state to create up to 5 backlog issues prioritized
-by leverage. Phase 3: persist learnings to `planner/MEMORY.md`.
+formula catalog, and project state to create up to 5 total issues (including
+promotions) prioritized by leverage. Phase 3: persist learnings to
+`planner/MEMORY.md`.
 
 **Trigger**: `planner-poll.sh` runs weekly via cron. It files an `action`
 issue referencing `formulas/run-planner.toml`; the [action-agent](#action-action)
@@ -165,7 +168,7 @@ picks it up and executes the planning steps in an interactive Claude tmux sessio
 **Key files**:
 - `planner/planner-poll.sh` — Cron wrapper: memory guard, dedup check, files action issue
 - `formulas/run-planner.toml` — Execution spec: five steps (preflight, agents-update,
-  prediction-triage, strategic-planning, memory-update) with `needs` dependencies.
+  triage-predictions, strategic-planning, memory-update) with `needs` dependencies.
   Steps 2 and 3 are independent; step 4 depends on both. Claude executes all steps
   in a single interactive session with tool access
 - `planner/MEMORY.md` — Persistent memory across runs (gitignored, local only)
diff --git a/formulas/run-planner.toml b/formulas/run-planner.toml
index df29237..8add108 100644
--- a/formulas/run-planner.toml
+++ b/formulas/run-planner.toml
@@ -89,10 +89,12 @@ Do NOT let an AGENTS.md failure prevent prediction triage or strategic planning.
 needs = ["preflight"]
 
 [[steps]]
-id    = "prediction-triage"
+id    = "triage-predictions"
 title = "Triage prediction/unreviewed issues"
 description = """
 Triage prediction issues filed by the predictor (goblin).
+Evidence from the preflight step informs whether each prediction is valid
+(e.g. "red-team stale since March 12" is confirmed by evidence/ timestamps).
 
 1. Fetch unreviewed predictions:
      curl -sf -H "Authorization: token $CODEBERG_TOKEN" \
@@ -107,21 +109,61 @@ Triage prediction issues filed by the predictor (goblin).
      curl -sf -H "Authorization: token $CODEBERG_TOKEN" \
        "$CODEBERG_API/issues?state=open&type=issues&limit=50"
 
-4. For each prediction, read the title and body. Decide:
-   - ACCEPT_ACTION: maps to an available formula -> create an action issue
-     with YAML front matter referencing the formula name and vars
-   - ACCEPT_BACKLOG: warrants dev work -> create a backlog issue
-   - DISMISS: noise, already covered by an open issue, or not actionable ->
-     post an explanation comment, then close the prediction issue
+4. For each prediction, read the title and body. Choose one action:
 
-5. For each accepted prediction:
-   - Create the new issue with the 'backlog' label (or 'action' label for
-     formula-matching actions)
-   - Remove 'prediction/unreviewed' label from the original prediction
-   - Add 'prediction/backlog' label to the original prediction
-   - Note what you accepted — you will need it for strategic-planning
+   - PROMOTE_ACTION: maps to an available formula → create an action issue
+     with YAML front matter referencing the formula name and vars.
+     Close the prediction with comment "Actioned as #NNN".
 
-6. Validation: if you reference a formula, verify it exists on disk.
+   - PROMOTE_BACKLOG: warrants dev work → create a backlog issue.
+     Close the prediction with comment "Actioned as #NNN".
+
+   - WATCH: not urgent but worth tracking → post a comment explaining
+     why it is not urgent, then relabel from prediction/unreviewed to
+     prediction/backlog. Do NOT close.
+
+   - DISMISS: noise, already covered by an open issue, or not actionable →
+     post a comment with explicit reasoning, then close the prediction.
+
+   Every decision MUST include reasoning in a comment on the prediction issue.
+
+5. Executing triage decisions via API:
+
+   For PROMOTE_ACTION / PROMOTE_BACKLOG:
+   a. Create the new issue with the 'action' or 'backlog' label:
+        curl -sf -X POST -H "Authorization: token $CODEBERG_TOKEN" \
+          -H "Content-Type: application/json" "$CODEBERG_API/issues" \
+          -d '{"title":"...","body":"...","labels":[<label_id>]}'
+   b. Comment on the prediction with "Actioned as #NNN":
+        curl -sf -X POST -H "Authorization: token $CODEBERG_TOKEN" \
+          -H "Content-Type: application/json" \
+          "$CODEBERG_API/issues/<pred_num>/comments" \
+          -d '{"body":"Actioned as #NNN — <reasoning>"}'
+   c. Close the prediction:
+        curl -sf -X PATCH -H "Authorization: token $CODEBERG_TOKEN" \
+          -H "Content-Type: application/json" \
+          "$CODEBERG_API/issues/<pred_num>" \
+          -d '{"state":"closed"}'
+
+   For WATCH:
+   a. Comment with reasoning why not urgent
+   b. Replace prediction/unreviewed label with prediction/backlog:
+        curl -sf -X DELETE -H "Authorization: token $CODEBERG_TOKEN" \
+          "$CODEBERG_API/issues/<pred_num>/labels/<unreviewed_label_id>"
+        curl -sf -X POST -H "Authorization: token $CODEBERG_TOKEN" \
+          -H "Content-Type: application/json" \
+          "$CODEBERG_API/issues/<pred_num>/labels" \
+          -d '{"labels":[<backlog_label_id>]}'
+
+   For DISMISS:
+   a. Comment with explicit reasoning
+   b. Close the prediction issue
+
+6. Track promoted predictions — they compete with vision gaps in the
+   strategic-planning step for the per-cycle 5-issue limit.
+   Record each promotion (issue number, title, type) for hand-off.
+
+7. Validation: if you reference a formula, verify it exists on disk.
    Fall back to a freeform backlog issue for unknown formulas.
 
 Be decisive — the predictor intentionally over-signals; your job is to filter.
@@ -145,7 +187,8 @@ Read these inputs:
   - Open issues (fetched via API) — what's already planned
   - $FACTORY_ROOT/metrics/supervisor-metrics.jsonl — operational trends (may not exist)
   - Planner memory (loaded in preflight)
-  - Accepted predictions from the triage step
+  - Promoted predictions from triage-predictions (these count toward the
+    per-cycle issue limit — they compete with vision gaps for priority)
 
 Reason through these five questions:
 
@@ -169,7 +212,8 @@ Reason through these five questions:
    Things that depend on blocked resources or aren't high-leverage
    right now. Do NOT create issues for these.
 
-Then create up to 5 issues, prioritized by leverage:
+Then create up to 5 issues total (including promotions from triage-predictions),
+prioritized by leverage:
 
 For formula-matching gaps, include YAML front matter in the body:
   ---
@@ -190,7 +234,7 @@ Create each issue via the API with the 'backlog' label:
     -d '{"title":"...","body":"...","labels":[<backlog_label_id>]}'
 
 Rules:
-- Max 5 new issues — highest leverage first
+- Max 5 new issues total (promoted predictions + vision gaps) — highest leverage first
 - Do NOT create issues that overlap with ANY existing open issue
 - Do NOT create issues for items you identified as "deferred"
 - Each body: what's missing, why it matters, rough approach
@@ -201,7 +245,7 @@ Rules:
 
 If there are no gaps, note that the backlog is aligned with the vision.
 """
-needs = ["agents-update", "prediction-triage"]
+needs = ["agents-update", "triage-predictions"]
 
 [[steps]]
 id    = "memory-update"

From dd61f6438e0f4afe87f59f42c4be37bb57022d61 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 20 Mar 2026 09:36:37 +0000
Subject: [PATCH 2/2] =?UTF-8?q?fix:=20address=20review=20=E2=80=94=20disam?=
 =?UTF-8?q?biguate=20label=20placeholders,=20relabel=20before=20close?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename <backlog_label_id> in WATCH path to <prediction_backlog_label_id>
  to avoid collision with the plain backlog label in strategic-planning
- Add prediction/actioned relabeling before close for PROMOTE and DISMISS
  paths so closed predictions are distinguishable from unprocessed ones
- Make step 4 comment format consistent with step 5: "Actioned as #NNN —
  <reasoning>" everywhere
- Add step 3b for explicit label ID resolution with create-if-missing for
  the new prediction/actioned label
- Document prediction/* and action labels in AGENTS.md label table

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 AGENTS.md                 |  4 ++++
 formulas/run-planner.toml | 40 ++++++++++++++++++++++++++++++++-------
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 1254a3b..4f90665 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -311,6 +311,10 @@ Issues flow through these states:
 | `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) |
 | `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) |
 | `vision` | Goal anchors — high-level objectives from VISION.md. | Planner, humans |
+| `prediction/unreviewed` | Unprocessed prediction filed by predictor. | prediction-agent.sh |
+| `prediction/backlog` | Prediction triaged as WATCH — not urgent, tracked. | Planner (triage-predictions step) |
+| `prediction/actioned` | Prediction promoted or dismissed by planner. | Planner (triage-predictions step) |
+| `action` | Operational task for the action-agent to execute via formula. | Planner, humans |
 
 ### Dependency conventions
 
diff --git a/formulas/run-planner.toml b/formulas/run-planner.toml
index 8add108..137029e 100644
--- a/formulas/run-planner.toml
+++ b/formulas/run-planner.toml
@@ -109,21 +109,33 @@ Evidence from the preflight step informs whether each prediction is valid
      curl -sf -H "Authorization: token $CODEBERG_TOKEN" \
        "$CODEBERG_API/issues?state=open&type=issues&limit=50"
 
+3b. Resolve label IDs needed for triage (fetch via $CODEBERG_API/labels):
+    - <unreviewed_label_id>        → prediction/unreviewed
+    - <prediction_backlog_label_id> → prediction/backlog
+    - <actioned_label_id>          → prediction/actioned (create if missing,
+      color #c2e0c6, description "Prediction triaged by planner")
+    - <backlog_label_id>           → backlog
+    - <action_label_id>            → action
+    These are DISTINCT labels — do not reuse IDs across them.
+
 4. For each prediction, read the title and body. Choose one action:
 
    - PROMOTE_ACTION: maps to an available formula → create an action issue
      with YAML front matter referencing the formula name and vars.
-     Close the prediction with comment "Actioned as #NNN".
+     Relabel prediction/unreviewed → prediction/actioned, then close
+     with comment "Actioned as #NNN — <reasoning>".
 
    - PROMOTE_BACKLOG: warrants dev work → create a backlog issue.
-     Close the prediction with comment "Actioned as #NNN".
+     Relabel prediction/unreviewed → prediction/actioned, then close
+     with comment "Actioned as #NNN — <reasoning>".
 
    - WATCH: not urgent but worth tracking → post a comment explaining
      why it is not urgent, then relabel from prediction/unreviewed to
      prediction/backlog. Do NOT close.
 
    - DISMISS: noise, already covered by an open issue, or not actionable →
-     post a comment with explicit reasoning, then close the prediction.
+     relabel prediction/unreviewed → prediction/actioned, post a comment
+     with explicit reasoning, then close the prediction.
 
    Every decision MUST include reasoning in a comment on the prediction issue.
 
@@ -134,12 +146,19 @@ Evidence from the preflight step informs whether each prediction is valid
         curl -sf -X POST -H "Authorization: token $CODEBERG_TOKEN" \
           -H "Content-Type: application/json" "$CODEBERG_API/issues" \
           -d '{"title":"...","body":"...","labels":[<label_id>]}'
-   b. Comment on the prediction with "Actioned as #NNN":
+   b. Comment on the prediction with "Actioned as #NNN — <reasoning>":
         curl -sf -X POST -H "Authorization: token $CODEBERG_TOKEN" \
           -H "Content-Type: application/json" \
           "$CODEBERG_API/issues/<pred_num>/comments" \
           -d '{"body":"Actioned as #NNN — <reasoning>"}'
-   c. Close the prediction:
+   c. Relabel: remove prediction/unreviewed, add prediction/actioned:
+        curl -sf -X DELETE -H "Authorization: token $CODEBERG_TOKEN" \
+          "$CODEBERG_API/issues/<pred_num>/labels/<unreviewed_label_id>"
+        curl -sf -X POST -H "Authorization: token $CODEBERG_TOKEN" \
+          -H "Content-Type: application/json" \
+          "$CODEBERG_API/issues/<pred_num>/labels" \
+          -d '{"labels":[<actioned_label_id>]}'
+   d. Close the prediction:
         curl -sf -X PATCH -H "Authorization: token $CODEBERG_TOKEN" \
           -H "Content-Type: application/json" \
           "$CODEBERG_API/issues/<pred_num>" \
@@ -153,11 +172,18 @@ Evidence from the preflight step informs whether each prediction is valid
         curl -sf -X POST -H "Authorization: token $CODEBERG_TOKEN" \
           -H "Content-Type: application/json" \
           "$CODEBERG_API/issues/<pred_num>/labels" \
-          -d '{"labels":[<backlog_label_id>]}'
+          -d '{"labels":[<prediction_backlog_label_id>]}'
 
    For DISMISS:
    a. Comment with explicit reasoning
-   b. Close the prediction issue
+   b. Relabel: remove prediction/unreviewed, add prediction/actioned:
+        curl -sf -X DELETE -H "Authorization: token $CODEBERG_TOKEN" \
+          "$CODEBERG_API/issues/<pred_num>/labels/<unreviewed_label_id>"
+        curl -sf -X POST -H "Authorization: token $CODEBERG_TOKEN" \
+          -H "Content-Type: application/json" \
+          "$CODEBERG_API/issues/<pred_num>/labels" \
+          -d '{"labels":[<actioned_label_id>]}'
+   c. Close the prediction issue
 
 6. Track promoted predictions — they compete with vision gaps in the
    strategic-planning step for the per-cycle 5-issue limit.