fix: feat: triage agent — deep root cause analysis for reproduced bugs (#258)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b0e789470e
commit
5189b70dd3
2 changed files with 181 additions and 0 deletions
|
|
@ -578,6 +578,131 @@ dispatch_reproduce() {
|
|||
log "Reproduce container launched (pid ${bg_pid}) for issue #${issue_number}"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Triage dispatch — launch sidecar for bug-report + in-triage issues
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Check if a triage run is already in-flight for a given issue.
|
||||
_triage_lockfile() {
|
||||
local issue="$1"
|
||||
echo "/tmp/triage-inflight-${issue}.pid"
|
||||
}
|
||||
|
||||
is_triage_running() {
|
||||
local issue="$1"
|
||||
local pidfile
|
||||
pidfile=$(_triage_lockfile "$issue")
|
||||
[ -f "$pidfile" ] || return 1
|
||||
local pid
|
||||
pid=$(cat "$pidfile" 2>/dev/null || echo "")
|
||||
[ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
|
||||
}
|
||||
|
||||
# Fetch open issues labelled both bug-report and in-triage.
|
||||
# Returns a newline-separated list of issue numbers.
|
||||
fetch_triage_candidates() {
|
||||
# Require FORGE_TOKEN, FORGE_URL, FORGE_REPO
|
||||
[ -n "${FORGE_TOKEN:-}" ] || return 0
|
||||
[ -n "${FORGE_URL:-}" ] || return 0
|
||||
[ -n "${FORGE_REPO:-}" ] || return 0
|
||||
|
||||
local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
|
||||
|
||||
local issues_json
|
||||
issues_json=$(curl -sf \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api}/issues?type=issues&state=open&labels=bug-report&limit=20" 2>/dev/null) || return 0
|
||||
|
||||
# Filter to issues that carry BOTH bug-report AND in-triage labels.
|
||||
local tmpjson
|
||||
tmpjson=$(mktemp)
|
||||
echo "$issues_json" > "$tmpjson"
|
||||
python3 - "$tmpjson" <<'PYEOF'
|
||||
import sys, json
|
||||
data = json.load(open(sys.argv[1]))
|
||||
for issue in data:
|
||||
labels = {l["name"] for l in (issue.get("labels") or [])}
|
||||
if "bug-report" in labels and "in-triage" in labels:
|
||||
print(issue["number"])
|
||||
PYEOF
|
||||
rm -f "$tmpjson"
|
||||
}
|
||||
|
||||
# Launch one triage container per candidate issue.
|
||||
# Uses the same disinto-reproduce:latest image as the reproduce-agent,
|
||||
# selecting the triage formula via DISINTO_FORMULA env var.
|
||||
# Stack lock is held for the full run (no timeout).
|
||||
dispatch_triage() {
|
||||
local issue_number="$1"
|
||||
|
||||
if is_triage_running "$issue_number"; then
|
||||
log "Triage already running for issue #${issue_number}, skipping"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Find first project TOML available (same convention as dev-poll)
|
||||
local project_toml=""
|
||||
for toml in "${FACTORY_ROOT}"/projects/*.toml; do
|
||||
[ -f "$toml" ] && { project_toml="$toml"; break; }
|
||||
done
|
||||
|
||||
if [ -z "$project_toml" ]; then
|
||||
log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping triage for #${issue_number}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "Dispatching triage-agent for issue #${issue_number} (project: ${project_toml})"
|
||||
|
||||
# Build docker run command using array (safe from injection)
|
||||
local -a cmd=(docker run --rm
|
||||
--name "disinto-triage-${issue_number}"
|
||||
--network host
|
||||
--security-opt apparmor=unconfined
|
||||
-v /var/run/docker.sock:/var/run/docker.sock
|
||||
-v agent-data:/home/agent/data
|
||||
-v project-repos:/home/agent/repos
|
||||
-e "FORGE_URL=${FORGE_URL}"
|
||||
-e "FORGE_TOKEN=${FORGE_TOKEN}"
|
||||
-e "FORGE_REPO=${FORGE_REPO}"
|
||||
-e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
|
||||
-e DISINTO_CONTAINER=1
|
||||
-e DISINTO_FORMULA=triage
|
||||
)
|
||||
|
||||
# Pass through ANTHROPIC_API_KEY if set
|
||||
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
|
||||
cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
|
||||
fi
|
||||
|
||||
# Mount ~/.claude and ~/.ssh from the runtime user's home if available
|
||||
local runtime_home="${HOME:-/home/debian}"
|
||||
if [ -d "${runtime_home}/.claude" ]; then
|
||||
cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
|
||||
fi
|
||||
if [ -f "${runtime_home}/.claude.json" ]; then
|
||||
cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
|
||||
fi
|
||||
if [ -d "${runtime_home}/.ssh" ]; then
|
||||
cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
|
||||
fi
|
||||
# Mount claude CLI binary if present on host
|
||||
if [ -f /usr/local/bin/claude ]; then
|
||||
cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
|
||||
fi
|
||||
|
||||
# Mount the project TOML into the container at a stable path
|
||||
local container_toml="/home/agent/project.toml"
|
||||
cmd+=(-v "${project_toml}:${container_toml}:ro")
|
||||
|
||||
cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
|
||||
|
||||
# Launch in background; write pid-file so we don't double-launch
|
||||
"${cmd[@]}" &
|
||||
local bg_pid=$!
|
||||
echo "$bg_pid" > "$(_triage_lockfile "$issue_number")"
|
||||
log "Triage container launched (pid ${bg_pid}) for issue #${issue_number}"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Main dispatcher loop
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
@ -638,6 +763,16 @@ main() {
|
|||
done <<< "$candidate_issues"
|
||||
fi
|
||||
|
||||
# Triage dispatch: check for bug-report + in-triage issues needing deep analysis
|
||||
local triage_issues
|
||||
triage_issues=$(fetch_triage_candidates) || true
|
||||
if [ -n "$triage_issues" ]; then
|
||||
while IFS= read -r issue_num; do
|
||||
[ -n "$issue_num" ] || continue
|
||||
dispatch_triage "$issue_num" || true
|
||||
done <<< "$triage_issues"
|
||||
fi
|
||||
|
||||
# Wait before next poll
|
||||
sleep 60
|
||||
done
|
||||
|
|
|
|||
46
formulas/triage.toml
Normal file
46
formulas/triage.toml
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# formulas/triage.toml — Triage-agent formula
|
||||
#
|
||||
# Declares the triage-agent's runtime parameters.
|
||||
# The dispatcher reads this to configure the sidecar container.
|
||||
#
|
||||
# Triggered by: bug-report + in-triage label combination.
|
||||
# Set by the reproduce-agent when:
|
||||
# - Bug was confirmed (reproduced)
|
||||
# - Quick log analysis did not reveal an obvious root cause
|
||||
# - Reproduce-agent documented all steps taken and logs examined
|
||||
#
|
||||
# What it does:
|
||||
# 1. Reads reproduce-agent findings from issue comments (do not repeat work)
|
||||
# 2. Deep-traces the data flow from symptom to source:
|
||||
# UI component → composable → API/GraphQL → indexer → chain
|
||||
# - Compare what the code expects vs what APIs actually return
|
||||
# - Create a throwaway branch, add debug instrumentation (console.log, verbose logging)
|
||||
# - Restart services, re-run reproduction, observe new output
|
||||
# - Delete throwaway branch when done
|
||||
# 3. Decomposes all root causes (may be 1 or multiple compounding):
|
||||
# - For each root cause, create a separate backlog issue with:
|
||||
# * Which cause it is (1 of N)
|
||||
# * Specific code path and fix suggestion
|
||||
# * Depends-on: #X if causes are layered
|
||||
# 4. Updates original issue:
|
||||
# - Posts summary: "Found N root causes, tracked as #X, #Y, #Z"
|
||||
# - Replaces in-triage with in-progress
|
||||
#
|
||||
# No hard timeout — runs until Claude hits its turn limit.
|
||||
# Stack lock held for full run (triage is rare; blocking CI is acceptable).
|
||||
#
|
||||
# stack_script: path (relative to PROJECT_REPO_ROOT) of the script used to
|
||||
# restart/rebuild the project stack. Leave empty ("") to connect to an
|
||||
# existing staging environment instead.
|
||||
#
|
||||
# tools: MCP servers to pass to claude via --mcp-server flags.
|
||||
|
||||
name = "triage"
|
||||
description = "Deep root cause analysis: trace data flow, add debug instrumentation, decompose causes into backlog issues."
|
||||
version = 1
|
||||
|
||||
# Set stack_script to the restart command for local stacks.
|
||||
# Leave empty ("") to target an existing staging environment.
|
||||
stack_script = ""
|
||||
|
||||
tools = ["playwright"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue