From 630344900d68e1971c4e4505137a3ef756df9276 Mon Sep 17 00:00:00 2001 From: Agent Date: Tue, 7 Apr 2026 18:27:34 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20fix:=20entrypoint-reproduce.sh=20ignores?= =?UTF-8?q?=20DISINTO=5FFORMULA=20env=20var=20=E2=80=94=20always=20runs=20?= =?UTF-8?q?reproduce=20formula=20(#356)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/reproduce/entrypoint-reproduce.sh | 263 +++++++++++++++++++++-- 1 file changed, 246 insertions(+), 17 deletions(-) diff --git a/docker/reproduce/entrypoint-reproduce.sh b/docker/reproduce/entrypoint-reproduce.sh index 2cbb3f9..c36192a 100644 --- a/docker/reproduce/entrypoint-reproduce.sh +++ b/docker/reproduce/entrypoint-reproduce.sh @@ -23,16 +23,35 @@ set -euo pipefail DISINTO_DIR="${DISINTO_DIR:-/home/agent/disinto}" -REPRODUCE_FORMULA="${DISINTO_DIR}/formulas/reproduce.toml" + +# Select formula based on DISINTO_FORMULA env var (set by dispatcher) +case "${DISINTO_FORMULA:-reproduce}" in + triage) + ACTIVE_FORMULA="${DISINTO_DIR}/formulas/triage.toml" + ;; + *) + ACTIVE_FORMULA="${DISINTO_DIR}/formulas/reproduce.toml" + ;; +esac + REPRODUCE_TIMEOUT="${REPRODUCE_TIMEOUT_MINUTES:-15}" LOGFILE="/home/agent/data/logs/reproduce.log" SCREENSHOT_DIR="/home/agent/data/screenshots" +# --------------------------------------------------------------------------- +# Determine agent type early for log prefix +# --------------------------------------------------------------------------- +if [ "${DISINTO_FORMULA:-reproduce}" = "triage" ]; then + AGENT_TYPE="triage" +else + AGENT_TYPE="reproduce" +fi + # --------------------------------------------------------------------------- # Logging # --------------------------------------------------------------------------- log() { - printf '[%s] reproduce: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" | tee -a "$LOGFILE" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$AGENT_TYPE" "$*" | tee -a "$LOGFILE" } # --------------------------------------------------------------------------- @@ -75,7 +94,11 @@ export PROJECT_NAME PROJECT_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}" -log "Starting reproduce-agent for issue #${ISSUE_NUMBER} (project: ${PROJECT_NAME})" +if [ "$AGENT_TYPE" = "triage" ]; then + log "Starting triage-agent for issue #${ISSUE_NUMBER} (project: ${PROJECT_NAME})" +else + log "Starting reproduce-agent for issue #${ISSUE_NUMBER} (project: ${PROJECT_NAME})" +fi # --------------------------------------------------------------------------- # Verify claude CLI is available (mounted from host) @@ -99,20 +122,20 @@ LOCK_HOLDER="reproduce-agent-${ISSUE_NUMBER}" FORMULA_STACK_SCRIPT="" FORMULA_TIMEOUT_MINUTES="${REPRODUCE_TIMEOUT}" -if [ -f "$REPRODUCE_FORMULA" ]; then +if [ -f "$ACTIVE_FORMULA" ]; then FORMULA_STACK_SCRIPT=$(python3 -c " import sys, tomllib with open(sys.argv[1], 'rb') as f: d = tomllib.load(f) print(d.get('stack_script', '')) -" "$REPRODUCE_FORMULA" 2>/dev/null || echo "") +" "$ACTIVE_FORMULA" 2>/dev/null || echo "") _tm=$(python3 -c " import sys, tomllib with open(sys.argv[1], 'rb') as f: d = tomllib.load(f) print(d.get('timeout_minutes', '${REPRODUCE_TIMEOUT}')) -" "$REPRODUCE_FORMULA" 2>/dev/null || echo "${REPRODUCE_TIMEOUT}") +" "$ACTIVE_FORMULA" 2>/dev/null || echo "${REPRODUCE_TIMEOUT}") FORMULA_TIMEOUT_MINUTES="$_tm" fi @@ -184,12 +207,202 @@ elif [ -n "$FORMULA_STACK_SCRIPT" ]; then fi # --------------------------------------------------------------------------- -# Build Claude prompt for reproduction +# Build Claude prompt based on agent type # --------------------------------------------------------------------------- TIMESTAMP=$(date -u '+%Y%m%d-%H%M%S') SCREENSHOT_PREFIX="${SCREENSHOT_DIR}/issue-${ISSUE_NUMBER}-${TIMESTAMP}" -CLAUDE_PROMPT=$(cat < + e. Search for related issues or TODOs in the code: + grep -r "TODO\|FIXME\|HACK" -- + +Capture for each layer: + - The data shape flowing in and out (field names, types, nullability) + - Whether the layer's behavior matches its documented contract + - Any discrepancy found + +If a clear root cause becomes obvious during tracing, note it and continue +checking whether additional causes exist downstream. + +### Step 3: Add debug instrumentation on a throwaway branch +Use ~30% of your total turn budget here. Only instrument after tracing has +identified the most likely failure points — do not instrument blindly. + +1. Create a throwaway debug branch (NEVER commit this to main): + cd "\$PROJECT_REPO_ROOT" + git checkout -b debug/triage-\${ISSUE_NUMBER} + +2. Add targeted logging at the layer boundaries identified during tracing: + - Console.log / structured log statements around the suspicious code path + - Log the actual values flowing through: inputs, outputs, intermediate state + - Add verbose mode flags if the stack supports them + - Keep instrumentation minimal — only what confirms or refutes the hypothesis + +3. Restart the stack using the configured script (if set): + \${stack_script:-"# No stack_script configured — restart manually or connect to staging"} + +4. Re-run the reproduction steps from the reproduce-agent findings. + +5. Observe and capture new output: + - Paste relevant log lines into your working notes + - Note whether the observed values match or contradict the hypothesis + +6. If the first instrumentation pass is inconclusive, iterate: + - Narrow the scope to the next most suspicious boundary + - Re-instrument, restart, re-run + - Maximum 2-3 instrumentation rounds before declaring inconclusive + +Do NOT push the debug branch. It will be deleted in the cleanup step. + +### Step 4: Decompose root causes into backlog issues +After tracing and instrumentation, articulate each distinct root cause. + +For each root cause found: + +1. Determine the relationship to other causes: + - Layered (one causes another) → use Depends-on in the issue body + - Independent (separate code paths fail independently) → use Related + +2. Create a backlog issue for each root cause: + curl -sf -X POST "\${FORGE_API}/issues" \\ + -H "Authorization: token \${FORGE_TOKEN}" \\ + -H "Content-Type: application/json" \\ + -d '{ + "title": "fix: ", + "body": "## Root cause\\n\\n\\n## Fix suggestion\\n\\n\\n## Context\\nDecomposed from #\${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>", + "labels": ["backlog"] + }' + +3. Note the newly created issue numbers. + +If only one root cause is found, still create a single backlog issue with +the specific code location and fix suggestion. + +If the investigation is inconclusive (no clear root cause found), skip this +step and proceed directly to link-back with the inconclusive outcome. + +### Step 5: Update original issue and relabel +Post a summary comment on the original issue and update its labels. + +#### If root causes were found (conclusive): + +Post a comment: + "## Triage findings + + Found N root cause(s): + - #X — (cause 1 of N) + - #Y — (cause 2 of N, depends on #X) + + Data flow traced: + Instrumentation: + + Next step: backlog issues above will be implemented in dependency order." + +Then swap labels: + - Remove: in-triage + - Add: in-progress + +#### If investigation was inconclusive (turn budget exhausted): + +Post a comment: + "## Triage — inconclusive + + Traced: + Tried: + Hypothesis: + + No definitive root cause identified. Leaving in-triage for supervisor + to handle as a stale triage session." + +Do NOT relabel. Leave in-triage. The supervisor monitors stale triage +sessions and will escalate or reassign. + +### Step 6: Delete throwaway debug branch +Always delete the debug branch, even if the investigation was inconclusive. + +1. Switch back to the main branch: + cd "\$PROJECT_REPO_ROOT" + git checkout "\$PRIMARY_BRANCH" + +2. Delete the local debug branch: + git branch -D debug/triage-\${ISSUE_NUMBER} + +3. Confirm no remote was pushed (if accidentally pushed, delete it too): + git push origin --delete debug/triage-\${ISSUE_NUMBER} 2>/dev/null || true + +4. Verify the worktree is clean: + git status + git worktree list + +A clean repo is a prerequisite for the next dev-agent run. Never leave +debug branches behind — they accumulate and pollute the branch list. + +## Notes +- The application is accessible at localhost (network_mode: host) +- Budget: 70% tracing data flow, 30% instrumented re-runs +- Timeout: \${FORMULA_TIMEOUT_MINUTES} minutes total (or until turn limit) +- Stack lock is held for the full run +- If stack_script is empty, connect to existing staging environment + +Begin now. +PROMPT + ) +else + # Reproduce-agent prompt: reproduce the bug and report findings + CLAUDE_PROMPT=$(cat </dev/null || echo '(no output)')\n\`\`\`" + if [ "$AGENT_TYPE" = "triage" ]; then + FINDINGS="Triage-agent completed but did not write a findings report. Claude output:\n\`\`\`\n$(tail -100 "/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>/dev/null || echo '(no output)')\n\`\`\`" + else + FINDINGS="Reproduce-agent completed but did not write a findings report. Claude output:\n\`\`\`\n$(tail -100 "/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>/dev/null || echo '(no output)')\n\`\`\`" + fi fi # --------------------------------------------------------------------------- @@ -381,6 +603,13 @@ _post_comment() { BUG_REPORT_ID=$(_label_id "bug-report" "#e4e669") _remove_label "$ISSUE_NUMBER" "$BUG_REPORT_ID" +# Determine agent name for comments (based on AGENT_TYPE set at script start) +if [ "$AGENT_TYPE" = "triage" ]; then + AGENT_NAME="Triage-agent" +else + AGENT_NAME="Reproduce-agent" +fi + # Determine outcome and apply appropriate labels LABEL_NAME="" LABEL_COLOR="" @@ -396,13 +625,13 @@ case "$OUTCOME" in # Obvious cause → add reproduced status label, create backlog issue for dev-agent LABEL_NAME="reproduced" LABEL_COLOR="#0075ca" - COMMENT_HEADER="## Reproduce-agent: **Reproduced with obvious cause** :white_check_mark: :zap:" + COMMENT_HEADER="## ${AGENT_NAME}: **Reproduced with obvious cause** :white_check_mark: :zap:" CREATE_BACKLOG_ISSUE=true else # Cause unclear → in-triage → Triage-agent LABEL_NAME="in-triage" LABEL_COLOR="#d93f0b" - COMMENT_HEADER="## Reproduce-agent: **Reproduced, cause unclear** :white_check_mark: :mag:" + COMMENT_HEADER="## ${AGENT_NAME}: **Reproduced, cause unclear** :white_check_mark: :mag:" fi ;; @@ -410,14 +639,14 @@ case "$OUTCOME" in # Cannot reproduce → rejected → Human review LABEL_NAME="rejected" LABEL_COLOR="#e4e669" - COMMENT_HEADER="## Reproduce-agent: **Cannot reproduce** :x:" + COMMENT_HEADER="## ${AGENT_NAME}: **Cannot reproduce** :x:" ;; needs-triage) # Inconclusive (timeout, env issues) → blocked → Gardener/human LABEL_NAME="blocked" LABEL_COLOR="#e11d48" - COMMENT_HEADER="## Reproduce-agent: **Inconclusive, blocked** :construction:" + COMMENT_HEADER="## ${AGENT_NAME}: **Inconclusive, blocked** :construction:" ;; esac @@ -460,9 +689,9 @@ COMMENT_BODY="${COMMENT_HEADER} ${FINDINGS}${SCREENSHOT_LIST} --- -*Reproduce-agent run at $(date -u '+%Y-%m-%d %H:%M:%S UTC') — project: ${PROJECT_NAME}*" +*${AGENT_NAME} run at $(date -u '+%Y-%m-%d %H:%M:%S UTC') — project: ${PROJECT_NAME}*" _post_comment "$ISSUE_NUMBER" "$COMMENT_BODY" log "Posted findings to issue #${ISSUE_NUMBER}" -log "Reproduce-agent done. Outcome: ${OUTCOME}" +log "${AGENT_NAME} done. Outcome: ${OUTCOME}"