2026-04-06 07:45:14 +00:00
#!/usr/bin/env bash
# entrypoint-reproduce.sh — Reproduce-agent sidecar entrypoint
#
# Acquires the stack lock, boots the project stack (if formula declares
# stack_script), then drives Claude + Playwright MCP to follow the bug
# report's repro steps. Labels the issue based on outcome and posts
# findings + screenshots.
#
# Usage (launched by dispatcher.sh):
# entrypoint-reproduce.sh <project_toml> <issue_number>
#
# Environment (injected by dispatcher via docker run -e):
# FORGE_URL, FORGE_TOKEN, FORGE_REPO, PRIMARY_BRANCH, DISINTO_CONTAINER=1
#
# Volumes expected:
# /home/agent/data — agent-data volume (stack-lock files go here)
# /home/agent/repos — project-repos volume
# /home/agent/.claude — host ~/.claude (OAuth credentials)
# /home/agent/.ssh — host ~/.ssh (read-only)
# /usr/local/bin/claude — host claude CLI binary (read-only)
# /var/run/docker.sock — host docker socket
set -euo pipefail
DISINTO_DIR = " ${ DISINTO_DIR :- /home/agent/disinto } "
2026-04-07 18:27:34 +00:00
# Select formula based on DISINTO_FORMULA env var (set by dispatcher)
case " ${ DISINTO_FORMULA :- reproduce } " in
triage)
ACTIVE_FORMULA = " ${ DISINTO_DIR } /formulas/triage.toml "
; ;
2026-04-08 07:14:57 +00:00
verify)
ACTIVE_FORMULA = " ${ DISINTO_DIR } /formulas/reproduce.toml "
; ;
2026-04-07 18:27:34 +00:00
*)
ACTIVE_FORMULA = " ${ DISINTO_DIR } /formulas/reproduce.toml "
; ;
esac
2026-04-06 07:45:14 +00:00
REPRODUCE_TIMEOUT = " ${ REPRODUCE_TIMEOUT_MINUTES :- 15 } "
LOGFILE = "/home/agent/data/logs/reproduce.log"
SCREENSHOT_DIR = "/home/agent/data/screenshots"
2026-04-07 18:27:34 +00:00
# ---------------------------------------------------------------------------
# Determine agent type early for log prefix
# ---------------------------------------------------------------------------
if [ " ${ DISINTO_FORMULA :- reproduce } " = "triage" ] ; then
AGENT_TYPE = "triage"
2026-04-08 07:14:57 +00:00
elif [ " ${ DISINTO_FORMULA :- reproduce } " = "verify" ] ; then
AGENT_TYPE = "verify"
2026-04-07 18:27:34 +00:00
else
AGENT_TYPE = "reproduce"
fi
2026-04-06 07:45:14 +00:00
# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
log( ) {
2026-04-07 18:27:34 +00:00
printf '[%s] %s: %s\n' " $( date -u '+%Y-%m-%d %H:%M:%S UTC' ) " " $AGENT_TYPE " " $* " | tee -a " $LOGFILE "
2026-04-06 07:45:14 +00:00
}
# ---------------------------------------------------------------------------
# Argument validation
# ---------------------------------------------------------------------------
PROJECT_TOML = " ${ 1 :- } "
ISSUE_NUMBER = " ${ 2 :- } "
if [ -z " $PROJECT_TOML " ] || [ -z " $ISSUE_NUMBER " ] ; then
log "FATAL: usage: entrypoint-reproduce.sh <project_toml> <issue_number>"
exit 1
fi
if [ ! -f " $PROJECT_TOML " ] ; then
log " FATAL: project TOML not found: ${ PROJECT_TOML } "
exit 1
fi
# ---------------------------------------------------------------------------
# Bootstrap: directories, env
# ---------------------------------------------------------------------------
mkdir -p /home/agent/data/logs /home/agent/data/locks " $SCREENSHOT_DIR "
export DISINTO_CONTAINER = 1
export HOME = " ${ HOME :- /home/agent } "
export USER = " ${ USER :- agent } "
FORGE_API = " ${ FORGE_URL } /api/v1/repos/ ${ FORGE_REPO } "
# Load project name from TOML
PROJECT_NAME = $( python3 -c "
import sys, tomllib
with open( sys.argv[ 1] , 'rb' ) as f:
print( tomllib.load( f) [ 'name' ] )
" " $PROJECT_TOML " 2>/dev/null) || {
log " FATAL: could not read project name from ${ PROJECT_TOML } "
exit 1
}
export PROJECT_NAME
PROJECT_REPO_ROOT = " /home/agent/repos/ ${ PROJECT_NAME } "
2026-04-07 18:27:34 +00:00
if [ " $AGENT_TYPE " = "triage" ] ; then
log " Starting triage-agent for issue # ${ ISSUE_NUMBER } (project: ${ PROJECT_NAME } ) "
else
log " Starting reproduce-agent for issue # ${ ISSUE_NUMBER } (project: ${ PROJECT_NAME } ) "
fi
2026-04-06 07:45:14 +00:00
# ---------------------------------------------------------------------------
# Verify claude CLI is available (mounted from host)
# ---------------------------------------------------------------------------
if ! command -v claude & >/dev/null; then
log "FATAL: claude CLI not found. Mount the host binary at /usr/local/bin/claude"
exit 1
fi
# ---------------------------------------------------------------------------
# Source stack-lock library
# ---------------------------------------------------------------------------
# shellcheck source=/home/agent/disinto/lib/stack-lock.sh
source " ${ DISINTO_DIR } /lib/stack-lock.sh "
LOCK_HOLDER = " reproduce-agent- ${ ISSUE_NUMBER } "
# ---------------------------------------------------------------------------
# Read formula config
# ---------------------------------------------------------------------------
FORMULA_STACK_SCRIPT = ""
FORMULA_TIMEOUT_MINUTES = " ${ REPRODUCE_TIMEOUT } "
2026-04-07 18:27:34 +00:00
if [ -f " $ACTIVE_FORMULA " ] ; then
2026-04-06 07:45:14 +00:00
FORMULA_STACK_SCRIPT = $( python3 -c "
import sys, tomllib
with open( sys.argv[ 1] , 'rb' ) as f:
d = tomllib.load( f)
print( d.get( 'stack_script' , '' ) )
2026-04-07 18:27:34 +00:00
" " $ACTIVE_FORMULA " 2>/dev/null || echo " " )
2026-04-06 07:45:14 +00:00
_tm = $( python3 -c "
import sys, tomllib
with open( sys.argv[ 1] , 'rb' ) as f:
d = tomllib.load( f)
print( d.get( 'timeout_minutes' , '${REPRODUCE_TIMEOUT}' ) )
2026-04-07 18:27:34 +00:00
" " $ACTIVE_FORMULA " 2>/dev/null || echo " ${ REPRODUCE_TIMEOUT } " )
2026-04-06 07:45:14 +00:00
FORMULA_TIMEOUT_MINUTES = " $_tm "
fi
log " Formula stack_script: ' ${ FORMULA_STACK_SCRIPT } ' "
log " Formula timeout: ${ FORMULA_TIMEOUT_MINUTES } m "
# ---------------------------------------------------------------------------
# Fetch issue details for repro steps
# ---------------------------------------------------------------------------
log " Fetching issue # ${ ISSUE_NUMBER } from ${ FORGE_API } ... "
ISSUE_JSON = $( curl -sf \
-H " Authorization: token ${ FORGE_TOKEN } " \
" ${ FORGE_API } /issues/ ${ ISSUE_NUMBER } " 2>/dev/null) || {
log " ERROR: failed to fetch issue # ${ ISSUE_NUMBER } "
exit 1
}
ISSUE_TITLE = $( echo " $ISSUE_JSON " | jq -r '.title // "unknown"' )
ISSUE_BODY = $( echo " $ISSUE_JSON " | jq -r '.body // ""' )
log " Issue: ${ ISSUE_TITLE } "
# ---------------------------------------------------------------------------
# Acquire stack lock
# ---------------------------------------------------------------------------
log " Acquiring stack lock for project ${ PROJECT_NAME } ... "
stack_lock_acquire " $LOCK_HOLDER " " $PROJECT_NAME " 900
log "Stack lock acquired."
# ---------------------------------------------------------------------------
# Start heartbeat in background (every 2 minutes)
# ---------------------------------------------------------------------------
heartbeat_loop( ) {
while true; do
sleep 120
stack_lock_heartbeat " $LOCK_HOLDER " " $PROJECT_NAME " 2>/dev/null || true
done
}
heartbeat_loop &
HEARTBEAT_PID = $!
2026-04-07 08:41:11 +00:00
# ---------------------------------------------------------------------------
# Debug branch cleanup trap (for triage-agent throwaway branches)
# ---------------------------------------------------------------------------
DEBUG_BRANCH = " triage-debug- ${ ISSUE_NUMBER } "
# Combined EXIT trap: heartbeat kill + stack lock release + debug branch cleanup
trap ' kill " $HEARTBEAT_PID " 2>/dev/null || true
stack_lock_release " $PROJECT_NAME " " $LOCK_HOLDER " || true
git -C " $PROJECT_REPO_ROOT " checkout " $PRIMARY_BRANCH " 2>/dev/null || true
git -C " $PROJECT_REPO_ROOT " branch -D " $DEBUG_BRANCH " 2>/dev/null || true
log "Cleanup completed (trap)" ' EXIT
2026-04-06 07:45:14 +00:00
# ---------------------------------------------------------------------------
# Boot the project stack if formula declares stack_script
# ---------------------------------------------------------------------------
if [ -n " $FORMULA_STACK_SCRIPT " ] && [ -d " $PROJECT_REPO_ROOT " ] ; then
log " Running stack_script: ${ FORMULA_STACK_SCRIPT } "
# Run in project repo root; script path is relative to project repo.
# Read stack_script into array to allow arguments (e.g. "scripts/dev.sh restart --full").
read -ra _stack_cmd <<< " $FORMULA_STACK_SCRIPT "
( cd " $PROJECT_REPO_ROOT " && bash " ${ _stack_cmd [@] } " ) || {
log "WARNING: stack_script exited non-zero — continuing anyway"
}
# Give the stack a moment to stabilise
sleep 5
elif [ -n " $FORMULA_STACK_SCRIPT " ] ; then
log " WARNING: PROJECT_REPO_ROOT not found at ${ PROJECT_REPO_ROOT } — skipping stack_script "
fi
# ---------------------------------------------------------------------------
2026-04-07 18:27:34 +00:00
# Build Claude prompt based on agent type
2026-04-06 07:45:14 +00:00
# ---------------------------------------------------------------------------
TIMESTAMP = $( date -u '+%Y%m%d-%H%M%S' )
SCREENSHOT_PREFIX = " ${ SCREENSHOT_DIR } /issue- ${ ISSUE_NUMBER } - ${ TIMESTAMP } "
2026-04-07 18:27:34 +00:00
if [ " $AGENT_TYPE " = "triage" ] ; then
# Triage-agent prompt: deep root cause analysis after reproduce-agent findings
CLAUDE_PROMPT = $( cat <<PROMP T
You are the triage-agent. Your task is to perform deep root cause analysis on issue #${ISSUE_NUMBER} after the reproduce-agent has confirmed the bug.
## Issue title
${ ISSUE_TITLE }
## Issue body
${ ISSUE_BODY }
## Your task — 6-step triage workflow
You have a defined 6-step workflow to follow. Budget your turns: ~70% on tracing, ~30% on instrumentation.
### Step 1: Read reproduce-agent findings
Before doing anything else , parse all prior evidence from the issue comments.
1. Fetch the issue body and all comments:
curl -sf -H "Authorization: token \${FORGE_TOKEN}" \
"\${FORGE_API}/issues/\${ISSUE_NUMBER}" | jq -r '.body'
curl -sf -H "Authorization: token \${FORGE_TOKEN}" \
"\${FORGE_API}/issues/\${ISSUE_NUMBER}/comments" | jq -r '.[].body'
2. Identify the reproduce-agent comment ( look for sections like
"Reproduction steps" , "Logs examined" , "What was tried" ) .
3. Extract and note:
- The exact symptom ( error message, unexpected value, visual regression)
- Steps that reliably trigger the bug
- Log lines or API responses already captured
- Any hypotheses the reproduce-agent already ruled out
Do NOT repeat work the reproduce-agent already did. Your job starts where
theirs ended. If no reproduce-agent comment is found, note it and proceed
with fresh investigation using the issue body only.
### Step 2: Trace data flow from symptom to source
Systematically follow the symptom backwards through each layer of the stack.
Generic layer traversal: UI → API → backend → data store
For each layer boundary:
1. What does the upstream layer send?
2. What does the downstream layer expect?
3. Is there a mismatch? If yes — is this the root cause or a symptom?
Tracing checklist:
a. Start at the layer closest to the visible symptom.
b. Read the relevant source files — do not guess data shapes.
c. Cross-reference API contracts: compare what the code sends vs what it
should send according to schemas, type definitions, or documentation.
d. Check recent git history on suspicious files:
git log --oneline -20 -- <file>
e. Search for related issues or TODOs in the code:
grep -r "TODO\|FIXME\|HACK" -- <relevant directory>
Capture for each layer:
- The data shape flowing in and out ( field names, types, nullability)
- Whether the layer' s behavior matches its documented contract
- Any discrepancy found
If a clear root cause becomes obvious during tracing, note it and continue
checking whether additional causes exist downstream.
### Step 3: Add debug instrumentation on a throwaway branch
Use ~30% of your total turn budget here. Only instrument after tracing has
identified the most likely failure points — do not instrument blindly.
1. Create a throwaway debug branch ( NEVER commit this to main) :
cd "\$PROJECT_REPO_ROOT"
git checkout -b debug/triage-\$ { ISSUE_NUMBER}
2. Add targeted logging at the layer boundaries identified during tracing:
- Console.log / structured log statements around the suspicious code path
- Log the actual values flowing through: inputs, outputs, intermediate state
- Add verbose mode flags if the stack supports them
- Keep instrumentation minimal — only what confirms or refutes the hypothesis
3. Restart the stack using the configured script ( if set ) :
\$ { stack_script:-"# No stack_script configured — restart manually or connect to staging" }
4. Re-run the reproduction steps from the reproduce-agent findings.
5. Observe and capture new output:
- Paste relevant log lines into your working notes
- Note whether the observed values match or contradict the hypothesis
6. If the first instrumentation pass is inconclusive, iterate:
- Narrow the scope to the next most suspicious boundary
- Re-instrument, restart, re-run
- Maximum 2-3 instrumentation rounds before declaring inconclusive
Do NOT push the debug branch. It will be deleted in the cleanup step.
### Step 4: Decompose root causes into backlog issues
After tracing and instrumentation, articulate each distinct root cause.
For each root cause found:
1. Determine the relationship to other causes:
- Layered ( one causes another) → use Depends-on in the issue body
- Independent ( separate code paths fail independently) → use Related
2. Create a backlog issue for each root cause:
curl -sf -X POST "\${FORGE_API}/issues" \\
-H "Authorization: token \${FORGE_TOKEN}" \\
-H "Content-Type: application/json" \\
-d ' {
"title" : "fix: <specific description of root cause N>" ,
"body" : "## Root cause\\n<exact code path, file:line>\\n\\n## Fix suggestion\\n<recommended approach>\\n\\n## Context\\nDecomposed from #\${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>" ,
2026-04-07 21:45:07 +00:00
"labels" : [ { "name" : "backlog" } ]
2026-04-07 18:27:34 +00:00
} '
3. Note the newly created issue numbers.
If only one root cause is found, still create a single backlog issue with
the specific code location and fix suggestion.
If the investigation is inconclusive ( no clear root cause found) , skip this
step and proceed directly to link-back with the inconclusive outcome.
### Step 5: Update original issue and relabel
Post a summary comment on the original issue and update its labels.
#### If root causes were found (conclusive):
Post a comment:
" ## Triage findings
Found N root cause( s) :
- #X — <one-line description> (cause 1 of N)
- #Y — <one-line description> (cause 2 of N, depends on #X)
Data flow traced: <layer where the bug originates>
Instrumentation: <key log output that confirmed the cause>
Next step: backlog issues above will be implemented in dependency order."
Then swap labels:
- Remove: in-triage
- Add: in-progress
#### If investigation was inconclusive (turn budget exhausted):
Post a comment:
" ## Triage — inconclusive
Traced: <layers checked>
Tried: <instrumentation attempts and what they showed>
Hypothesis: <best guess at cause, if any>
No definitive root cause identified. Leaving in-triage for supervisor
to handle as a stale triage session."
Do NOT relabel. Leave in-triage. The supervisor monitors stale triage
sessions and will escalate or reassign.
### Step 6: Delete throwaway debug branch
Always delete the debug branch, even if the investigation was inconclusive.
1. Switch back to the main branch:
cd "\$PROJECT_REPO_ROOT"
git checkout "\$PRIMARY_BRANCH"
2. Delete the local debug branch:
git branch -D debug/triage-\$ { ISSUE_NUMBER}
3. Confirm no remote was pushed ( if accidentally pushed, delete it too) :
git push origin --delete debug/triage-\$ { ISSUE_NUMBER} 2>/dev/null || true
4. Verify the worktree is clean:
git status
git worktree list
A clean repo is a prerequisite for the next dev-agent run. Never leave
debug branches behind — they accumulate and pollute the branch list.
## Notes
- The application is accessible at localhost ( network_mode: host)
- Budget: 70% tracing data flow, 30% instrumented re-runs
- Timeout: \$ { FORMULA_TIMEOUT_MINUTES} minutes total ( or until turn limit)
- Stack lock is held for the full run
- If stack_script is empty, connect to existing staging environment
Begin now.
PROMPT
)
else
# Reproduce-agent prompt: reproduce the bug and report findings
CLAUDE_PROMPT = $( cat <<PROMP T
2026-04-06 07:45:14 +00:00
You are the reproduce-agent. Your task is to reproduce the bug described in issue #${ISSUE_NUMBER} and report your findings.
## Issue title
${ ISSUE_TITLE }
## Issue body
${ ISSUE_BODY }
2026-04-06 20:54:15 +00:00
## Your task — PRIMARY GOAL FIRST
2026-04-06 07:45:14 +00:00
2026-04-06 20:54:15 +00:00
This agent has ONE primary job and ONE secondary, minor job. Follow this ORDER:
2026-04-06 07:45:14 +00:00
2026-04-06 20:54:15 +00:00
### PRIMARY: Can the bug be reproduced? (60% of your turns)
2026-04-06 07:45:14 +00:00
2026-04-06 20:54:15 +00:00
This is the EXIT GATE. Answer YES or NO before doing anything else .
2026-04-06 07:45:14 +00:00
2026-04-06 20:54:15 +00:00
1. Read the issue, understand the claimed behavior
2. Navigate the app via Playwright, follow the reported steps
3. Observe: does the symptom match the report?
4. Take screenshots as evidence ( save to: ${ SCREENSHOT_PREFIX } -step-N.png)
5. Conclude: **reproduced** or **cannot reproduce**
2026-04-06 07:45:14 +00:00
2026-04-06 20:54:15 +00:00
If **cannot reproduce** → Write OUTCOME = cannot-reproduce, write findings, DONE. EXIT.
If **inconclusive** ( timeout, env issues, app not reachable) → Write OUTCOME = needs-triage with reason, write findings, DONE. EXIT.
If **reproduced** → Continue to secondary check.
### SECONDARY (minor): Is the cause obvious? (40% of your turns, only if reproduced)
Only after reproduction is confirmed. Quick check only — do not go deep.
1. Check container logs: docker compose -f ${ PROJECT_REPO_ROOT } /docker-compose.yml logs --tail= 200
Look for : stack traces, error messages, wrong addresses, missing config, parse errors
2. Check browser console output captured during reproduction
3. If the cause JUMPS OUT ( clear error, obvious misconfiguration) → note it
If **obvious cause** → Write OUTCOME = reproduced and ROOT_CAUSE = <one-line summary>
If **not obvious** → Write OUTCOME = reproduced ( no ROOT_CAUSE line)
## Output files
1. **Findings report** — Write to: /tmp/reproduce-findings-${ ISSUE_NUMBER } .md
2026-04-06 07:45:14 +00:00
Include:
- Steps you followed
- What you observed ( screenshots referenced by path)
- Log excerpts ( truncated to relevant lines)
2026-04-06 20:54:15 +00:00
- OUTCOME line: OUTCOME = reproduced OR OUTCOME = cannot-reproduce OR OUTCOME = needs-triage
- ROOT_CAUSE line ( ONLY if cause is obvious) : ROOT_CAUSE = <one-line summary>
2026-04-06 07:45:14 +00:00
2026-04-06 20:54:15 +00:00
2. **Outcome file** — Write to: /tmp/reproduce-outcome-${ ISSUE_NUMBER } .txt
Write ONLY the outcome word: reproduced OR cannot-reproduce OR needs-triage
2026-04-06 07:45:14 +00:00
## Notes
- The application is accessible at localhost ( network_mode: host)
- Take screenshots liberally — they are evidence
- If the app is not running or not reachable, write outcome: cannot-reproduce with reason "stack not reachable"
- Timeout: ${ FORMULA_TIMEOUT_MINUTES } minutes total
2026-04-06 20:54:15 +00:00
- EXIT gates are enforced — do not continue to secondary check if primary result is NO or inconclusive
2026-04-06 07:45:14 +00:00
Begin now.
PROMPT
2026-04-07 18:27:34 +00:00
)
fi
2026-04-06 07:45:14 +00:00
# ---------------------------------------------------------------------------
# Run Claude with Playwright MCP
# ---------------------------------------------------------------------------
2026-04-07 18:27:34 +00:00
if [ " $AGENT_TYPE " = "triage" ] ; then
log " Starting triage-agent session (timeout: ${ FORMULA_TIMEOUT_MINUTES } m)... "
else
log " Starting Claude reproduction session (timeout: ${ FORMULA_TIMEOUT_MINUTES } m)... "
fi
2026-04-06 07:45:14 +00:00
CLAUDE_EXIT = 0
timeout " $(( FORMULA_TIMEOUT_MINUTES * 60 )) " \
claude -p " $CLAUDE_PROMPT " \
--mcp-server playwright \
--output-format text \
--max-turns 40 \
> " /tmp/reproduce-claude-output- ${ ISSUE_NUMBER } .txt " 2>& 1 || CLAUDE_EXIT = $?
if [ $CLAUDE_EXIT -eq 124 ] ; then
log " WARNING: Claude session timed out after ${ FORMULA_TIMEOUT_MINUTES } m "
fi
2026-04-07 21:45:07 +00:00
# ---------------------------------------------------------------------------
# Triage post-processing: enforce backlog label on created issues
# ---------------------------------------------------------------------------
# The triage agent may create sub-issues for root causes. Ensure they have
# the backlog label so dev-agent picks them up. Parse Claude output for
# newly created issue numbers and add the backlog label.
if [ " $AGENT_TYPE " = "triage" ] ; then
log "Triage post-processing: checking for created issues to label..."
# Extract issue numbers from Claude output that were created during triage.
# Match unambiguous creation patterns: "Created issue #123", "Created #123",
# or "harb#123". Do NOT match bare #123 which would capture references in
# the triage summary (e.g., "Decomposed from #5", "cause 1 of 2", etc.).
CREATED_ISSUES = $( grep -oE '(Created|created) issue #[0-9]+|(Created|created) #[0-9]+|harb#[0-9]+' \
" /tmp/reproduce-claude-output- ${ ISSUE_NUMBER } .txt " 2>/dev/null | \
grep -oE '[0-9]+' | sort -u | head -10)
if [ -n " $CREATED_ISSUES " ] ; then
# Get backlog label ID
BACKLOG_ID = $( _label_id "backlog" "#fef2c0" )
if [ -z " $BACKLOG_ID " ] ; then
log "WARNING: could not get backlog label ID — skipping label enforcement"
else
for issue_num in $CREATED_ISSUES ; do
_add_label " $issue_num " " $BACKLOG_ID "
log " Added backlog label to created issue # ${ issue_num } "
done
fi
fi
fi
2026-04-06 07:45:14 +00:00
# ---------------------------------------------------------------------------
# Read outcome
# ---------------------------------------------------------------------------
OUTCOME = "needs-triage"
2026-04-07 22:03:25 +00:00
OUTCOME_FILE = ""
OUTCOME_FOUND = false
# Check reproduce-agent outcome file first
2026-04-06 07:45:14 +00:00
if [ -f " /tmp/reproduce-outcome- ${ ISSUE_NUMBER } .txt " ] ; then
2026-04-07 22:03:25 +00:00
OUTCOME_FILE = " /tmp/reproduce-outcome- ${ ISSUE_NUMBER } .txt "
OUTCOME_FOUND = true
fi
# For triage agent, also check triage-specific outcome file
if [ " $AGENT_TYPE " = "triage" ] && [ -f " /tmp/triage-outcome- ${ ISSUE_NUMBER } .txt " ] ; then
OUTCOME_FILE = " /tmp/triage-outcome- ${ ISSUE_NUMBER } .txt "
OUTCOME_FOUND = true
fi
if [ " $OUTCOME_FOUND " = true ] ; then
_raw = $( tr -d '[:space:]' < " $OUTCOME_FILE " | tr '[:upper:]' '[:lower:]' )
2026-04-06 07:45:14 +00:00
case " $_raw " in
reproduced| cannot-reproduce| needs-triage)
OUTCOME = " $_raw "
; ;
*)
log " WARNING: unexpected outcome ' ${ _raw } ' — defaulting to needs-triage "
; ;
esac
else
2026-04-07 22:03:25 +00:00
# For triage agent, detect success by checking Claude output for:
# 1. Triage findings comment indicating root causes were found
# 2. Sub-issues created during triage
if [ " $AGENT_TYPE " = "triage" ] ; then
CLAUDE_OUTPUT = " /tmp/reproduce-claude-output- ${ ISSUE_NUMBER } .txt "
# Check for triage findings comment with root causes found
if grep -q "## Triage findings" " $CLAUDE_OUTPUT " 2>/dev/null && \
grep -q "Found [0-9]* root cause(s)" " $CLAUDE_OUTPUT " 2>/dev/null; then
log "Triage success detected: findings comment with root causes found"
OUTCOME = "reproduced"
OUTCOME_FOUND = true
# Check for created sub-issues during triage
elif grep -qE "(Created|created) issue #[0-9]+|(Created|created) #[0-9]+|harb#[0-9]+" " $CLAUDE_OUTPUT " 2>/dev/null; then
log "Triage success detected: sub-issues created"
OUTCOME = "reproduced"
OUTCOME_FOUND = true
else
log "WARNING: outcome file not found and no triage success indicators — defaulting to needs-triage"
fi
else
log "WARNING: outcome file not found — defaulting to needs-triage"
fi
2026-04-06 07:45:14 +00:00
fi
log " Outcome: ${ OUTCOME } "
2026-04-08 07:14:57 +00:00
# ---------------------------------------------------------------------------
# Verification mode: check if this is a parent issue whose sub-issues are all closed
# If so, re-run reproduction to verify the bug is fixed
# ---------------------------------------------------------------------------
if [ " $AGENT_TYPE " = "verify" ] ; then
log " Verification mode: checking for sub-issues of parent issue # ${ ISSUE_NUMBER } "
# Check if this issue is a parent with sub-issues
if _is_parent_issue " $ISSUE_NUMBER " ; then
log " Found ${# _PARENT_SUB_ISSUES [@] } sub-issue(s) for parent # ${ ISSUE_NUMBER } "
# Check if all sub-issues are closed
if _are_all_sub_issues_closed; then
log "All sub-issues are closed — triggering verification reproduction"
# Re-run the reproduction to check if bug is fixed
log "Running verification reproduction..."
# Build Claude prompt for verification mode
SUB_ISSUE_LIST = $( _get_sub_issue_list)
CLAUDE_PROMPT = $( cat <<PROMP T
You are the reproduce-agent running in **verification mode**. Your task is to re-run the reproduction steps from the original bug report to verify that the bug has been fixed after all sub-issues were resolved.
## Issue title
${ ISSUE_TITLE }
## Issue body
${ ISSUE_BODY }
## Context
This issue was decomposed into the following sub-issues that have all been resolved:
${ SUB_ISSUE_LIST }
All sub-issues have been closed, indicating that fixes have been merged. Your task is to verify that the original bug is now fixed.
## Your task
1. Follow the reproduction steps from the original issue body
2. Execute each step carefully and observe the current behavior
3. Take screenshots as evidence ( save to: ${ SCREENSHOT_PREFIX } -step-N.png)
4. Determine if the bug is **fixed** ( no longer reproduces) or **still present**
## Output files
1. **Findings report** — Write to: /tmp/reproduce-findings-${ ISSUE_NUMBER } .md
Include:
- Steps you followed
- What you observed ( screenshots referenced by path)
- OUTCOME line: OUTCOME = verified-fixed OR OUTCOME = still-reproduces
2. **Outcome file** — Write to: /tmp/reproduce-outcome-${ ISSUE_NUMBER } .txt
Write ONLY the outcome word: verified-fixed OR still-reproduces
## Notes
- The application is accessible at localhost ( network_mode: host)
- Take screenshots liberally — they are evidence
- If the app is not running or not reachable, write outcome: still-reproduces with reason "stack not reachable"
- Timeout: ${ FORMULA_TIMEOUT_MINUTES } minutes total
Begin now.
PROMPT
)
# Run Claude for verification
log " Starting Claude verification session (timeout: ${ FORMULA_TIMEOUT_MINUTES } m)... "
CLAUDE_EXIT = 0
timeout " $(( FORMULA_TIMEOUT_MINUTES * 60 )) " \
claude -p " $CLAUDE_PROMPT " \
--mcp-server playwright \
--output-format text \
--max-turns 40 \
> " /tmp/reproduce-claude-output- ${ ISSUE_NUMBER } .txt " 2>& 1 || CLAUDE_EXIT = $?
if [ $CLAUDE_EXIT -eq 124 ] ; then
log " WARNING: Claude verification session timed out after ${ FORMULA_TIMEOUT_MINUTES } m "
fi
# Read verification outcome
VERIFY_OUTCOME = "still-reproduces"
if [ -f " /tmp/reproduce-outcome- ${ ISSUE_NUMBER } .txt " ] ; then
_raw = $( tr -d '[:space:]' < " /tmp/reproduce-outcome- ${ ISSUE_NUMBER } .txt " | tr '[:upper:]' '[:lower:]' )
case " $_raw " in
verified-fixed| still-reproduces)
VERIFY_OUTCOME = " $_raw "
; ;
*)
log " WARNING: unexpected verification outcome ' ${ _raw } ' — defaulting to still-reproduces "
; ;
esac
fi
log " Verification outcome: ${ VERIFY_OUTCOME } "
# Read findings
VERIFY_FINDINGS = ""
if [ -f " /tmp/reproduce-findings- ${ ISSUE_NUMBER } .md " ] ; then
VERIFY_FINDINGS = $( cat " /tmp/reproduce-findings- ${ ISSUE_NUMBER } .md " )
else
VERIFY_FINDINGS = " Verification-agent completed but did not write a findings report. Claude output:\n\`\`\`\n $( tail -100 " /tmp/reproduce-claude-output- ${ ISSUE_NUMBER } .txt " 2>/dev/null || echo '(no output)' ) \n\`\`\` "
fi
# Collect screenshot paths
VERIFY_SCREENSHOT_LIST = ""
if find " $( dirname " ${ SCREENSHOT_PREFIX } " ) " -name " $( basename " ${ SCREENSHOT_PREFIX } " ) -*.png " -maxdepth 1 2>/dev/null | grep -q .; then
VERIFY_SCREENSHOT_LIST = "\n\n**Screenshots taken:**\n"
for f in " ${ SCREENSHOT_PREFIX } " -*.png; do
VERIFY_SCREENSHOT_LIST = " ${ VERIFY_SCREENSHOT_LIST } - \` $( basename " $f " ) \`\n "
done
fi
# Process verification result
if [ " $VERIFY_OUTCOME " = "verified-fixed" ] ; then
# Bug is fixed — comment, remove in-progress, close the issue
AGENT_NAME = "Verification-agent"
COMMENT_HEADER = " ## ${ AGENT_NAME } : **Verified fixed** :white_check_mark: :tada: "
COMMENT_BODY = " ${ COMMENT_HEADER }
The bug described in this issue has been **verified as fixed** after all sub-issues were resolved.
**Resolved sub-issues:**
${ SUB_ISSUE_LIST }
**Verification result:** The reproduction steps no longer trigger the bug.
${ VERIFY_FINDINGS } ${ VERIFY_SCREENSHOT_LIST }
---
*${ AGENT_NAME } verification run at $( date -u '+%Y-%m-%d %H:%M:%S UTC' ) — project: ${ PROJECT_NAME } *
Closing this issue as the bug has been verified fixed."
_post_comment " $ISSUE_NUMBER " " $COMMENT_BODY "
log " Posted verification comment to issue # ${ ISSUE_NUMBER } "
# Remove in-progress label
IN_PROGRESS_ID = $( _label_id "in-progress" "#1d76db" )
_remove_label " $ISSUE_NUMBER " " $IN_PROGRESS_ID "
# Close the issue
curl -sf -X PATCH \
-H " Authorization: token ${ FORGE_TOKEN } " \
-H "Content-Type: application/json" \
" ${ FORGE_API } /issues/ ${ ISSUE_NUMBER } " \
-d '{"state":"closed"}' >/dev/null 2>& 1 || true
log " Closed issue # ${ ISSUE_NUMBER } — bug verified fixed "
else
# Bug still reproduces — comment, keep in-progress, trigger new triage
AGENT_NAME = "Verification-agent"
COMMENT_HEADER = " ## ${ AGENT_NAME } : **Still reproduces after fixes** :x: "
COMMENT_BODY = " ${ COMMENT_HEADER }
The bug described in this issue **still reproduces** after all sub-issues were resolved.
**Resolved sub-issues:**
${ SUB_ISSUE_LIST }
**Verification result:** The reproduction steps still trigger the bug. Additional investigation needed.
${ VERIFY_FINDINGS } ${ VERIFY_SCREENSHOT_LIST }
---
*${ AGENT_NAME } verification run at $( date -u '+%Y-%m-%d %H:%M:%S UTC' ) — project: ${ PROJECT_NAME } *
This issue will be re-entered into triage for further investigation."
_post_comment " $ISSUE_NUMBER " " $COMMENT_BODY "
log " Posted verification comment to issue # ${ ISSUE_NUMBER } "
# Re-trigger triage by adding in-triage label
IN_TRIAGE_ID = $( _label_id "in-triage" "#d93f0b" )
_add_label " $ISSUE_NUMBER " " $IN_TRIAGE_ID "
log " Re-triggered triage for issue # ${ ISSUE_NUMBER } — bug still reproduces "
fi
# Exit after verification
log " Verification complete. Outcome: ${ VERIFY_OUTCOME } "
exit 0
else
log "Not all sub-issues are closed yet — skipping verification"
fi
else
log " Issue # ${ ISSUE_NUMBER } is not a parent with tracked sub-issues — running standard reproduction "
fi
fi
2026-04-06 07:45:14 +00:00
# ---------------------------------------------------------------------------
# Read findings
# ---------------------------------------------------------------------------
FINDINGS = ""
if [ -f " /tmp/reproduce-findings- ${ ISSUE_NUMBER } .md " ] ; then
FINDINGS = $( cat " /tmp/reproduce-findings- ${ ISSUE_NUMBER } .md " )
else
2026-04-07 18:27:34 +00:00
if [ " $AGENT_TYPE " = "triage" ] ; then
FINDINGS = " Triage-agent completed but did not write a findings report. Claude output:\n\`\`\`\n $( tail -100 " /tmp/reproduce-claude-output- ${ ISSUE_NUMBER } .txt " 2>/dev/null || echo '(no output)' ) \n\`\`\` "
else
FINDINGS = " Reproduce-agent completed but did not write a findings report. Claude output:\n\`\`\`\n $( tail -100 " /tmp/reproduce-claude-output- ${ ISSUE_NUMBER } .txt " 2>/dev/null || echo '(no output)' ) \n\`\`\` "
fi
2026-04-06 07:45:14 +00:00
fi
# ---------------------------------------------------------------------------
# Collect screenshot paths for comment
# ---------------------------------------------------------------------------
SCREENSHOT_LIST = ""
if find " $( dirname " ${ SCREENSHOT_PREFIX } " ) " -name " $( basename " ${ SCREENSHOT_PREFIX } " ) -*.png " -maxdepth 1 2>/dev/null | grep -q .; then
SCREENSHOT_LIST = "\n\n**Screenshots taken:**\n"
for f in " ${ SCREENSHOT_PREFIX } " -*.png; do
SCREENSHOT_LIST = " ${ SCREENSHOT_LIST } - \` $( basename " $f " ) \`\n "
done
fi
2026-04-08 07:14:57 +00:00
# ---------------------------------------------------------------------------
# Verification helpers — bug-report lifecycle
# ---------------------------------------------------------------------------
# Check if an issue is a parent with sub-issues (identified by sub-issues
# whose body contains "Decomposed from #N" where N is the parent's number).
# Returns: 0 if parent with sub-issues found, 1 otherwise
# Sets: _PARENT_SUB_ISSUES (space-separated list of sub-issue numbers)
_is_parent_issue( ) {
local parent_num = " $1 "
_PARENT_SUB_ISSUES = ""
# Fetch all issues (open and closed) to find sub-issues
local all_issues_json
all_issues_json = $( curl -sf \
-H " Authorization: token ${ FORGE_TOKEN } " \
" ${ FORGE_API } /issues?type=issues&state=all&limit=50 " 2>/dev/null) || return 1
# Find issues whose body contains "Decomposed from #<parent_num>"
local sub_issues
sub_issues = $( python3 -c '
import sys, json
parent_num = sys.argv[ 1]
data = json.load( open( "/dev/stdin" ) )
sub_issues = [ ]
for issue in data:
body = issue.get( "body" ) or ""
if f"Decomposed from #{parent_num}" in body:
sub_issues.append( str( issue[ "number" ] ) )
print( " " .join( sub_issues) )
' " $parent_num " < <( echo " $all_issues_json " ) ) || return 1
if [ -n " $sub_issues " ] ; then
_PARENT_SUB_ISSUES = " $sub_issues "
return 0
fi
return 1
}
# Check if all sub-issues of a parent are closed.
# Requires: _PARENT_SUB_ISSUES to be set
# Returns: 0 if all closed, 1 if any still open
_are_all_sub_issues_closed( ) {
if [ -z " ${ _PARENT_SUB_ISSUES :- } " ] ; then
return 1
fi
for sub_num in $_PARENT_SUB_ISSUES ; do
local sub_state
sub_state = $( curl -sf \
-H " Authorization: token ${ FORGE_TOKEN } " \
" ${ FORGE_API } /issues/ ${ sub_num } " 2>/dev/null | jq -r '.state // "unknown"' ) || {
log " WARNING: could not fetch state of sub-issue # ${ sub_num } "
return 1
}
if [ " $sub_state " != "closed" ] ; then
log " Sub-issue # ${ sub_num } is not closed (state: ${ sub_state } ) "
return 1
fi
done
return 0
}
# Get sub-issue details for comment
# Returns: formatted list of sub-issues
_get_sub_issue_list( ) {
local result = ""
for sub_num in $_PARENT_SUB_ISSUES ; do
local sub_title
sub_title = $( curl -sf \
-H " Authorization: token ${ FORGE_TOKEN } " \
" ${ FORGE_API } /issues/ ${ sub_num } " 2>/dev/null | jq -r '.title // "unknown"' ) || {
sub_title = "unknown"
}
result = " ${ result } - # ${ sub_num } ${ sub_title } " $'\n'
done
printf '%s' " $result "
}
2026-04-06 07:45:14 +00:00
# ---------------------------------------------------------------------------
# Label helpers
# ---------------------------------------------------------------------------
_label_id( ) {
local name = " $1 " color = " $2 "
local id
id = $( curl -sf \
-H " Authorization: token ${ FORGE_TOKEN } " \
" ${ FORGE_API } /labels " 2>/dev/null \
| jq -r --arg n " $name " '.[] | select(.name == $n) | .id' 2>/dev/null || echo "" )
if [ -z " $id " ] ; then
id = $( curl -sf -X POST \
-H " Authorization: token ${ FORGE_TOKEN } " \
-H "Content-Type: application/json" \
" ${ FORGE_API } /labels " \
-d " {\"name\":\" ${ name } \",\"color\":\" ${ color } \"} " 2>/dev/null \
| jq -r '.id // empty' 2>/dev/null || echo "" )
fi
echo " $id "
}
_add_label( ) {
local issue = " $1 " label_id = " $2 "
[ -z " $label_id " ] && return 0
curl -sf -X POST \
-H " Authorization: token ${ FORGE_TOKEN } " \
-H "Content-Type: application/json" \
" ${ FORGE_API } /issues/ ${ issue } /labels " \
-d " {\"labels\":[ ${ label_id } ]} " >/dev/null 2>& 1 || true
}
_remove_label( ) {
local issue = " $1 " label_id = " $2 "
[ -z " $label_id " ] && return 0
curl -sf -X DELETE \
-H " Authorization: token ${ FORGE_TOKEN } " \
" ${ FORGE_API } /issues/ ${ issue } /labels/ ${ label_id } " >/dev/null 2>& 1 || true
}
_post_comment( ) {
local issue = " $1 " body = " $2 "
curl -sf -X POST \
-H " Authorization: token ${ FORGE_TOKEN } " \
-H "Content-Type: application/json" \
" ${ FORGE_API } /issues/ ${ issue } /comments " \
-d " $( jq -nc --arg b " $body " '{body:$b}' ) " >/dev/null 2>& 1 || true
}
# ---------------------------------------------------------------------------
# Apply labels and post findings
# ---------------------------------------------------------------------------
2026-04-06 20:54:15 +00:00
# Exit gate logic:
# 1. Can I reproduce it? → NO → rejected/blocked → EXIT
# → YES → continue
# 2. Is the cause obvious? → YES → backlog issue for dev → EXIT
# → NO → in-triage → EXIT
#
# Label combinations (on the ORIGINAL issue):
# - Reproduced + obvious cause: reproduced (custom status) → backlog issue created
# - Reproduced + cause unclear: in-triage → Triage-agent
# - Cannot reproduce: rejected → Human review
# - Inconclusive (timeout/error): blocked → Gardener/human
#
# The newly created fix issue (when cause is obvious) gets backlog label
# so dev-poll will pick it up for implementation.
2026-04-06 07:45:14 +00:00
# Remove bug-report label (we are resolving it)
BUG_REPORT_ID = $( _label_id "bug-report" "#e4e669" )
_remove_label " $ISSUE_NUMBER " " $BUG_REPORT_ID "
2026-04-07 18:27:34 +00:00
# Determine agent name for comments (based on AGENT_TYPE set at script start)
if [ " $AGENT_TYPE " = "triage" ] ; then
AGENT_NAME = "Triage-agent"
else
AGENT_NAME = "Reproduce-agent"
fi
2026-04-06 20:54:15 +00:00
# Determine outcome and apply appropriate labels
LABEL_NAME = ""
LABEL_COLOR = ""
COMMENT_HEADER = ""
CREATE_BACKLOG_ISSUE = false
2026-04-06 07:45:14 +00:00
case " $OUTCOME " in
reproduced)
2026-04-06 20:54:15 +00:00
# Check if root cause is obvious (ROOT_CAUSE is set and non-trivial)
2026-04-06 07:45:14 +00:00
ROOT_CAUSE = $( grep -m1 "^ROOT_CAUSE=" " /tmp/reproduce-findings- ${ ISSUE_NUMBER } .md " 2>/dev/null \
2026-04-06 20:54:15 +00:00
| sed 's/^ROOT_CAUSE=//' || echo "" )
if [ -n " $ROOT_CAUSE " ] && [ " $ROOT_CAUSE " != " See findings on issue # ${ ISSUE_NUMBER } " ] ; then
# Obvious cause → add reproduced status label, create backlog issue for dev-agent
LABEL_NAME = "reproduced"
LABEL_COLOR = "#0075ca"
2026-04-07 18:27:34 +00:00
COMMENT_HEADER = " ## ${ AGENT_NAME } : **Reproduced with obvious cause** :white_check_mark: :zap: "
2026-04-06 20:54:15 +00:00
CREATE_BACKLOG_ISSUE = true
else
# Cause unclear → in-triage → Triage-agent
LABEL_NAME = "in-triage"
LABEL_COLOR = "#d93f0b"
2026-04-07 18:27:34 +00:00
COMMENT_HEADER = " ## ${ AGENT_NAME } : **Reproduced, cause unclear** :white_check_mark: :mag: "
2026-04-06 20:54:15 +00:00
fi
2026-04-06 07:45:14 +00:00
; ;
cannot-reproduce)
2026-04-06 20:54:15 +00:00
# Cannot reproduce → rejected → Human review
LABEL_NAME = "rejected"
2026-04-06 07:45:14 +00:00
LABEL_COLOR = "#e4e669"
2026-04-07 18:27:34 +00:00
COMMENT_HEADER = " ## ${ AGENT_NAME } : **Cannot reproduce** :x: "
2026-04-06 07:45:14 +00:00
; ;
needs-triage)
2026-04-06 20:54:15 +00:00
# Inconclusive (timeout, env issues) → blocked → Gardener/human
LABEL_NAME = "blocked"
LABEL_COLOR = "#e11d48"
2026-04-07 18:27:34 +00:00
COMMENT_HEADER = " ## ${ AGENT_NAME } : **Inconclusive, blocked** :construction: "
2026-04-06 07:45:14 +00:00
; ;
esac
2026-04-06 20:54:15 +00:00
# Apply the outcome label
2026-04-06 07:45:14 +00:00
OUTCOME_LABEL_ID = $( _label_id " $LABEL_NAME " " $LABEL_COLOR " )
_add_label " $ISSUE_NUMBER " " $OUTCOME_LABEL_ID "
log " Applied label ' ${ LABEL_NAME } ' to issue # ${ ISSUE_NUMBER } "
2026-04-06 20:54:15 +00:00
# If obvious cause, create backlog issue for dev-agent
if [ " $CREATE_BACKLOG_ISSUE " = true ] ; then
BACKLOG_BODY = " ## Summary
Bug reproduced from issue #${ISSUE_NUMBER}: ${ISSUE_TITLE}
Root cause ( quick log analysis) : ${ ROOT_CAUSE }
## Dependencies
- #${ISSUE_NUMBER}
## Affected files
- ( see findings on issue #${ISSUE_NUMBER})
## Acceptance criteria
- [ ] Root cause confirmed and fixed
- [ ] Issue #${ISSUE_NUMBER} no longer reproducible"
log "Creating backlog issue for reproduced bug with obvious cause..."
curl -sf -X POST \
-H " Authorization: token ${ FORGE_TOKEN } " \
-H "Content-Type: application/json" \
" ${ FORGE_API } /issues " \
-d " $( jq -nc \
--arg t " fix: $( echo " $ISSUE_TITLE " | sed 's/^bug:/fix:/' | sed 's/^feat:/fix:/' ) " \
--arg b " $BACKLOG_BODY " \
'{title:$t, body:$b, labels:[{"name":"backlog"}]}' 2>/dev/null) " >/dev/null 2>&1 || \
log "WARNING: failed to create backlog issue"
fi
2026-04-06 07:45:14 +00:00
COMMENT_BODY = " ${ COMMENT_HEADER }
${ FINDINGS } ${ SCREENSHOT_LIST }
---
2026-04-07 18:27:34 +00:00
*${ AGENT_NAME } run at $( date -u '+%Y-%m-%d %H:%M:%S UTC' ) — project: ${ PROJECT_NAME } *"
2026-04-06 07:45:14 +00:00
_post_comment " $ISSUE_NUMBER " " $COMMENT_BODY "
log " Posted findings to issue # ${ ISSUE_NUMBER } "
2026-04-07 18:27:34 +00:00
log " ${ AGENT_NAME } done. Outcome: ${ OUTCOME } "