diff --git a/docker/edge/dispatcher.sh b/docker/edge/dispatcher.sh index 4b79bbf..884063d 100755 --- a/docker/edge/dispatcher.sh +++ b/docker/edge/dispatcher.sh @@ -578,6 +578,131 @@ dispatch_reproduce() { log "Reproduce container launched (pid ${bg_pid}) for issue #${issue_number}" } +# ----------------------------------------------------------------------------- +# Triage dispatch — launch sidecar for bug-report + in-triage issues +# ----------------------------------------------------------------------------- + +# Check if a triage run is already in-flight for a given issue. +_triage_lockfile() { + local issue="$1" + echo "/tmp/triage-inflight-${issue}.pid" +} + +is_triage_running() { + local issue="$1" + local pidfile + pidfile=$(_triage_lockfile "$issue") + [ -f "$pidfile" ] || return 1 + local pid + pid=$(cat "$pidfile" 2>/dev/null || echo "") + [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null +} + +# Fetch open issues labelled both bug-report and in-triage. +# Returns a newline-separated list of issue numbers. +fetch_triage_candidates() { + # Require FORGE_TOKEN, FORGE_URL, FORGE_REPO + [ -n "${FORGE_TOKEN:-}" ] || return 0 + [ -n "${FORGE_URL:-}" ] || return 0 + [ -n "${FORGE_REPO:-}" ] || return 0 + + local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}" + + local issues_json + issues_json=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api}/issues?type=issues&state=open&labels=bug-report&limit=20" 2>/dev/null) || return 0 + + # Filter to issues that carry BOTH bug-report AND in-triage labels. + local tmpjson + tmpjson=$(mktemp) + echo "$issues_json" > "$tmpjson" + python3 - "$tmpjson" <<'PYEOF' +import sys, json +data = json.load(open(sys.argv[1])) +for issue in data: + labels = {l["name"] for l in (issue.get("labels") or [])} + if "bug-report" in labels and "in-triage" in labels: + print(issue["number"]) +PYEOF + rm -f "$tmpjson" +} + +# Launch one triage container per candidate issue. +# Uses the same disinto-reproduce:latest image as the reproduce-agent, +# selecting the triage formula via DISINTO_FORMULA env var. +# Stack lock is held for the full run (no timeout). +dispatch_triage() { + local issue_number="$1" + + if is_triage_running "$issue_number"; then + log "Triage already running for issue #${issue_number}, skipping" + return 0 + fi + + # Find first project TOML available (same convention as dev-poll) + local project_toml="" + for toml in "${FACTORY_ROOT}"/projects/*.toml; do + [ -f "$toml" ] && { project_toml="$toml"; break; } + done + + if [ -z "$project_toml" ]; then + log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping triage for #${issue_number}" + return 0 + fi + + log "Dispatching triage-agent for issue #${issue_number} (project: ${project_toml})" + + # Build docker run command using array (safe from injection) + local -a cmd=(docker run --rm + --name "disinto-triage-${issue_number}" + --network host + --security-opt apparmor=unconfined + -v /var/run/docker.sock:/var/run/docker.sock + -v agent-data:/home/agent/data + -v project-repos:/home/agent/repos + -e "FORGE_URL=${FORGE_URL}" + -e "FORGE_TOKEN=${FORGE_TOKEN}" + -e "FORGE_REPO=${FORGE_REPO}" + -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}" + -e DISINTO_CONTAINER=1 + -e DISINTO_FORMULA=triage + ) + + # Pass through ANTHROPIC_API_KEY if set + if [ -n "${ANTHROPIC_API_KEY:-}" ]; then + cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}") + fi + + # Mount ~/.claude and ~/.ssh from the runtime user's home if available + local runtime_home="${HOME:-/home/debian}" + if [ -d "${runtime_home}/.claude" ]; then + cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude") + fi + if [ -f "${runtime_home}/.claude.json" ]; then + cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro") + fi + if [ -d "${runtime_home}/.ssh" ]; then + cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro") + fi + # Mount claude CLI binary if present on host + if [ -f /usr/local/bin/claude ]; then + cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro) + fi + + # Mount the project TOML into the container at a stable path + local container_toml="/home/agent/project.toml" + cmd+=(-v "${project_toml}:${container_toml}:ro") + + cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number") + + # Launch in background; write pid-file so we don't double-launch + "${cmd[@]}" & + local bg_pid=$! + echo "$bg_pid" > "$(_triage_lockfile "$issue_number")" + log "Triage container launched (pid ${bg_pid}) for issue #${issue_number}" +} + # ----------------------------------------------------------------------------- # Main dispatcher loop # ----------------------------------------------------------------------------- @@ -638,6 +763,16 @@ main() { done <<< "$candidate_issues" fi + # Triage dispatch: check for bug-report + in-triage issues needing deep analysis + local triage_issues + triage_issues=$(fetch_triage_candidates) || true + if [ -n "$triage_issues" ]; then + while IFS= read -r issue_num; do + [ -n "$issue_num" ] || continue + dispatch_triage "$issue_num" || true + done <<< "$triage_issues" + fi + # Wait before next poll sleep 60 done diff --git a/formulas/triage.toml b/formulas/triage.toml new file mode 100644 index 0000000..bee1887 --- /dev/null +++ b/formulas/triage.toml @@ -0,0 +1,46 @@ +# formulas/triage.toml — Triage-agent formula +# +# Declares the triage-agent's runtime parameters. +# The dispatcher reads this to configure the sidecar container. +# +# Triggered by: bug-report + in-triage label combination. +# Set by the reproduce-agent when: +# - Bug was confirmed (reproduced) +# - Quick log analysis did not reveal an obvious root cause +# - Reproduce-agent documented all steps taken and logs examined +# +# What it does: +# 1. Reads reproduce-agent findings from issue comments (do not repeat work) +# 2. Deep-traces the data flow from symptom to source: +# UI component → composable → API/GraphQL → indexer → chain +# - Compare what the code expects vs what APIs actually return +# - Create a throwaway branch, add debug instrumentation (console.log, verbose logging) +# - Restart services, re-run reproduction, observe new output +# - Delete throwaway branch when done +# 3. Decomposes all root causes (may be 1 or multiple compounding): +# - For each root cause, create a separate backlog issue with: +# * Which cause it is (1 of N) +# * Specific code path and fix suggestion +# * Depends-on: #X if causes are layered +# 4. Updates original issue: +# - Posts summary: "Found N root causes, tracked as #X, #Y, #Z" +# - Replaces in-triage with in-progress +# +# No hard timeout — runs until Claude hits its turn limit. +# Stack lock held for full run (triage is rare; blocking CI is acceptable). +# +# stack_script: path (relative to PROJECT_REPO_ROOT) of the script used to +# restart/rebuild the project stack. Leave empty ("") to connect to an +# existing staging environment instead. +# +# tools: MCP servers to pass to claude via --mcp-server flags. + +name = "triage" +description = "Deep root cause analysis: trace data flow, add debug instrumentation, decompose causes into backlog issues." +version = 1 + +# Set stack_script to the restart command for local stacks. +# Leave empty ("") to target an existing staging environment. +stack_script = "" + +tools = ["playwright"]