Merge pull request 'fix: feat: triage formula template with generic investigation steps and best practices (#342)' (#347) from fix/issue-342 into main
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
This commit is contained in:
commit
113bc422cb
1 changed files with 244 additions and 27 deletions
|
|
@ -1,7 +1,9 @@
|
|||
# formulas/triage.toml — Triage-agent formula
|
||||
# formulas/triage.toml — Triage-agent formula (generic template)
|
||||
#
|
||||
# Declares the triage-agent's runtime parameters.
|
||||
# The dispatcher reads this to configure the sidecar container.
|
||||
# This is the base template for triage investigations.
|
||||
# Project-specific formulas (e.g. formulas/triage-harb.toml) extend this by
|
||||
# overriding the fields in the [project] section and providing stack-specific
|
||||
# step descriptions.
|
||||
#
|
||||
# Triggered by: bug-report + in-triage label combination.
|
||||
# Set by the reproduce-agent when:
|
||||
|
|
@ -9,38 +11,253 @@
|
|||
# - Quick log analysis did not reveal an obvious root cause
|
||||
# - Reproduce-agent documented all steps taken and logs examined
|
||||
#
|
||||
# What it does:
|
||||
# 1. Reads reproduce-agent findings from issue comments (do not repeat work)
|
||||
# 2. Deep-traces the data flow from symptom to source:
|
||||
# UI component → composable → API/GraphQL → indexer → chain
|
||||
# - Compare what the code expects vs what APIs actually return
|
||||
# - Create a throwaway branch, add debug instrumentation (console.log, verbose logging)
|
||||
# - Restart services, re-run reproduction, observe new output
|
||||
# - Delete throwaway branch when done
|
||||
# 3. Decomposes all root causes (may be 1 or multiple compounding):
|
||||
# - For each root cause, create a separate backlog issue with:
|
||||
# * Which cause it is (1 of N)
|
||||
# * Specific code path and fix suggestion
|
||||
# * Depends-on: #X if causes are layered
|
||||
# 4. Updates original issue:
|
||||
# - Posts summary: "Found N root causes, tracked as #X, #Y, #Z"
|
||||
# - Replaces in-triage with in-progress
|
||||
# Steps:
|
||||
# 1. read-findings — parse issue comments for prior reproduce-agent evidence
|
||||
# 2. trace-data-flow — follow symptom through UI → API → backend → data store
|
||||
# 3. instrumentation — throwaway branch, add logging, restart, observe
|
||||
# 4. decompose — file backlog issues for each root cause
|
||||
# 5. link-back — update original issue, swap in-triage → in-progress
|
||||
# 6. cleanup — delete throwaway debug branch
|
||||
#
|
||||
# Best practices:
|
||||
# - Start from reproduce-agent findings; do not repeat their work
|
||||
# - Budget: 70% tracing data flow, 30% instrumented re-runs
|
||||
# - Multiple causes: check if layered (Depends-on) or independent (Related)
|
||||
# - Always delete the throwaway debug branch before finishing
|
||||
# - If inconclusive after full turn budget: leave in-triage, post what was
|
||||
# tried, do NOT relabel — supervisor handles stale triage sessions
|
||||
#
|
||||
# Project-specific formulas extend this template by defining:
|
||||
# - stack_script: how to start/stop the project stack
|
||||
# - [project].data_flow: layer names (e.g. "chain → indexer → GraphQL → UI")
|
||||
# - [project].api_endpoints: which APIs/services to inspect
|
||||
# - [project].stack_lock: stack lock configuration
|
||||
# - Per-step description overrides with project-specific commands
|
||||
#
|
||||
# No hard timeout — runs until Claude hits its turn limit.
|
||||
# Stack lock held for full run (triage is rare; blocking CI is acceptable).
|
||||
#
|
||||
# stack_script: path (relative to PROJECT_REPO_ROOT) of the script used to
|
||||
# restart/rebuild the project stack. Leave empty ("") to connect to an
|
||||
# existing staging environment instead.
|
||||
#
|
||||
# tools: MCP servers to pass to claude via --mcp-server flags.
|
||||
|
||||
name = "triage"
|
||||
description = "Deep root cause analysis: trace data flow, add debug instrumentation, decompose causes into backlog issues."
|
||||
version = 1
|
||||
version = 2
|
||||
|
||||
# Set stack_script to the restart command for local stacks.
|
||||
# Leave empty ("") to target an existing staging environment.
|
||||
# Leave empty ("") to connect to an existing staging environment.
|
||||
stack_script = ""
|
||||
|
||||
tools = ["playwright"]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Project-specific extension fields.
|
||||
# Override these in formulas/triage-<project>.toml.
|
||||
# ---------------------------------------------------------------------------
|
||||
[project]
|
||||
# Human-readable layer names for the data-flow trace (generic default).
|
||||
# Example project override: "chain → indexer → GraphQL → UI"
|
||||
data_flow = "UI → API → backend → data store"
|
||||
|
||||
# Comma-separated list of API endpoints or services to inspect.
|
||||
# Example: "GraphQL /graphql, REST /api/v1, RPC ws://localhost:8545"
|
||||
api_endpoints = ""
|
||||
|
||||
# Stack lock configuration (leave empty for default behavior).
|
||||
# Example: "full" to hold a full stack lock during triage.
|
||||
stack_lock = ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Steps
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
[[steps]]
|
||||
id = "read-findings"
|
||||
title = "Read reproduce-agent findings"
|
||||
description = """
|
||||
Before doing anything else, parse all prior evidence from the issue comments.
|
||||
|
||||
1. Fetch the issue body and all comments:
|
||||
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/issues/${ISSUE_NUMBER}" | jq -r '.body'
|
||||
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/issues/${ISSUE_NUMBER}/comments" | jq -r '.[].body'
|
||||
|
||||
2. Identify the reproduce-agent comment (look for sections like
|
||||
"Reproduction steps", "Logs examined", "What was tried").
|
||||
|
||||
3. Extract and note:
|
||||
- The exact symptom (error message, unexpected value, visual regression)
|
||||
- Steps that reliably trigger the bug
|
||||
- Log lines or API responses already captured
|
||||
- Any hypotheses the reproduce-agent already ruled out
|
||||
|
||||
Do NOT repeat work the reproduce-agent already did. Your job starts where
|
||||
theirs ended. If no reproduce-agent comment is found, note it and proceed
|
||||
with fresh investigation using the issue body only.
|
||||
"""
|
||||
|
||||
[[steps]]
|
||||
id = "trace-data-flow"
|
||||
title = "Trace data flow from symptom to source"
|
||||
description = """
|
||||
Systematically follow the symptom backwards through each layer of the stack.
|
||||
Spend ~70% of your total turn budget here before moving to instrumentation.
|
||||
|
||||
Generic layer traversal (adapt to the project's actual stack):
|
||||
UI → API → backend → data store
|
||||
|
||||
For each layer boundary:
|
||||
1. What does the upstream layer send?
|
||||
2. What does the downstream layer expect?
|
||||
3. Is there a mismatch? If yes — is this the root cause or a symptom?
|
||||
|
||||
Tracing checklist:
|
||||
a. Start at the layer closest to the visible symptom.
|
||||
b. Read the relevant source files — do not guess data shapes.
|
||||
c. Cross-reference API contracts: compare what the code sends vs what it
|
||||
should send according to schemas, type definitions, or documentation.
|
||||
d. Check recent git history on suspicious files:
|
||||
git log --oneline -20 -- <file>
|
||||
e. Search for related issues or TODOs in the code:
|
||||
grep -r "TODO\|FIXME\|HACK" -- <relevant directory>
|
||||
|
||||
Capture for each layer:
|
||||
- The data shape flowing in and out (field names, types, nullability)
|
||||
- Whether the layer's behavior matches its documented contract
|
||||
- Any discrepancy found
|
||||
|
||||
If a clear root cause becomes obvious during tracing, note it and continue
|
||||
checking whether additional causes exist downstream.
|
||||
"""
|
||||
needs = ["read-findings"]
|
||||
|
||||
[[steps]]
|
||||
id = "instrumentation"
|
||||
title = "Add debug instrumentation on a throwaway branch"
|
||||
description = """
|
||||
Use ~30% of your total turn budget here. Only instrument after tracing has
|
||||
identified the most likely failure points — do not instrument blindly.
|
||||
|
||||
1. Create a throwaway debug branch (NEVER commit this to main):
|
||||
cd "$PROJECT_REPO_ROOT"
|
||||
git checkout -b debug/triage-${ISSUE_NUMBER}
|
||||
|
||||
2. Add targeted logging at the layer boundaries identified during tracing:
|
||||
- Console.log / structured log statements around the suspicious code path
|
||||
- Log the actual values flowing through: inputs, outputs, intermediate state
|
||||
- Add verbose mode flags if the stack supports them
|
||||
- Keep instrumentation minimal — only what confirms or refutes the hypothesis
|
||||
|
||||
3. Restart the stack using the configured script (if set):
|
||||
${stack_script:-"# No stack_script configured — restart manually or connect to staging"}
|
||||
|
||||
4. Re-run the reproduction steps from the reproduce-agent findings.
|
||||
|
||||
5. Observe and capture new output:
|
||||
- Paste relevant log lines into your working notes
|
||||
- Note whether the observed values match or contradict the hypothesis
|
||||
|
||||
6. If the first instrumentation pass is inconclusive, iterate:
|
||||
- Narrow the scope to the next most suspicious boundary
|
||||
- Re-instrument, restart, re-run
|
||||
- Maximum 2-3 instrumentation rounds before declaring inconclusive
|
||||
|
||||
Do NOT push the debug branch. It will be deleted in the cleanup step.
|
||||
"""
|
||||
needs = ["trace-data-flow"]
|
||||
|
||||
[[steps]]
|
||||
id = "decompose"
|
||||
title = "Decompose root causes into backlog issues"
|
||||
description = """
|
||||
After tracing and instrumentation, articulate each distinct root cause.
|
||||
|
||||
For each root cause found:
|
||||
|
||||
1. Determine the relationship to other causes:
|
||||
- Layered (one causes another) → use Depends-on in the issue body
|
||||
- Independent (separate code paths fail independently) → use Related
|
||||
|
||||
2. Create a backlog issue for each root cause:
|
||||
curl -sf -X POST "${FORGE_API}/issues" \\
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{
|
||||
"title": "fix: <specific description of root cause N>",
|
||||
"body": "## Root cause\\n<exact code path, file:line>\\n\\n## Fix suggestion\\n<recommended approach>\\n\\n## Context\\nDecomposed from #${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>",
|
||||
"labels": ["backlog"]
|
||||
}'
|
||||
|
||||
3. Note the newly created issue numbers.
|
||||
|
||||
If only one root cause is found, still create a single backlog issue with
|
||||
the specific code location and fix suggestion.
|
||||
|
||||
If the investigation is inconclusive (no clear root cause found), skip this
|
||||
step and proceed directly to link-back with the inconclusive outcome.
|
||||
"""
|
||||
needs = ["instrumentation"]
|
||||
|
||||
[[steps]]
|
||||
id = "link-back"
|
||||
title = "Update original issue and relabel"
|
||||
description = """
|
||||
Post a summary comment on the original issue and update its labels.
|
||||
|
||||
### If root causes were found (conclusive):
|
||||
|
||||
Post a comment:
|
||||
"## Triage findings
|
||||
|
||||
Found N root cause(s):
|
||||
- #X — <one-line description> (cause 1 of N)
|
||||
- #Y — <one-line description> (cause 2 of N, depends on #X)
|
||||
|
||||
Data flow traced: <layer where the bug originates>
|
||||
Instrumentation: <key log output that confirmed the cause>
|
||||
|
||||
Next step: backlog issues above will be implemented in dependency order."
|
||||
|
||||
Then swap labels:
|
||||
- Remove: in-triage
|
||||
- Add: in-progress
|
||||
|
||||
### If investigation was inconclusive (turn budget exhausted):
|
||||
|
||||
Post a comment:
|
||||
"## Triage — inconclusive
|
||||
|
||||
Traced: <layers checked>
|
||||
Tried: <instrumentation attempts and what they showed>
|
||||
Hypothesis: <best guess at cause, if any>
|
||||
|
||||
No definitive root cause identified. Leaving in-triage for supervisor
|
||||
to handle as a stale triage session."
|
||||
|
||||
Do NOT relabel. Leave in-triage. The supervisor monitors stale triage
|
||||
sessions and will escalate or reassign.
|
||||
"""
|
||||
needs = ["decompose"]
|
||||
|
||||
[[steps]]
|
||||
id = "cleanup"
|
||||
title = "Delete throwaway debug branch"
|
||||
description = """
|
||||
Always delete the debug branch, even if the investigation was inconclusive.
|
||||
|
||||
1. Switch back to the main branch:
|
||||
cd "$PROJECT_REPO_ROOT"
|
||||
git checkout "$PRIMARY_BRANCH"
|
||||
|
||||
2. Delete the local debug branch:
|
||||
git branch -D debug/triage-${ISSUE_NUMBER}
|
||||
|
||||
3. Confirm no remote was pushed (if accidentally pushed, delete it too):
|
||||
git push origin --delete debug/triage-${ISSUE_NUMBER} 2>/dev/null || true
|
||||
|
||||
4. Verify the worktree is clean:
|
||||
git status
|
||||
git worktree list
|
||||
|
||||
A clean repo is a prerequisite for the next dev-agent run. Never leave
|
||||
debug branches behind — they accumulate and pollute the branch list.
|
||||
"""
|
||||
needs = ["link-back"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue