fix: feat: extend edge container with Playwright and docker compose for bug reproduction (#256)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ca3459ec61
commit
300f335179
5 changed files with 588 additions and 0 deletions
|
|
@ -39,6 +39,23 @@ services:
|
||||||
depends_on:
|
depends_on:
|
||||||
- forgejo
|
- forgejo
|
||||||
|
|
||||||
|
reproduce:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: docker/reproduce/Dockerfile
|
||||||
|
image: disinto-reproduce:latest
|
||||||
|
network_mode: host
|
||||||
|
profiles: ["reproduce"]
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
- agent-data:/home/agent/data
|
||||||
|
- project-repos:/home/agent/repos
|
||||||
|
- ${HOME}/.claude:/home/agent/.claude
|
||||||
|
- /usr/local/bin/claude:/usr/local/bin/claude:ro
|
||||||
|
- ${HOME}/.ssh:/home/agent/.ssh:ro
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
|
||||||
forgejo:
|
forgejo:
|
||||||
image: codeberg.org/forgejo/forgejo:1
|
image: codeberg.org/forgejo/forgejo:1
|
||||||
container_name: disinto-forgejo
|
container_name: disinto-forgejo
|
||||||
|
|
|
||||||
|
|
@ -451,6 +451,129 @@ launch_runner() {
|
||||||
return $exit_code
|
return $exit_code
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Reproduce dispatch — launch sidecar for bug-report issues
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Check if a reproduce run is already in-flight for a given issue.
|
||||||
|
# Uses a simple pid-file in /tmp so we don't double-launch per dispatcher cycle.
|
||||||
|
_reproduce_lockfile() {
|
||||||
|
local issue="$1"
|
||||||
|
echo "/tmp/reproduce-inflight-${issue}.pid"
|
||||||
|
}
|
||||||
|
|
||||||
|
is_reproduce_running() {
|
||||||
|
local issue="$1"
|
||||||
|
local pidfile
|
||||||
|
pidfile=$(_reproduce_lockfile "$issue")
|
||||||
|
[ -f "$pidfile" ] || return 1
|
||||||
|
local pid
|
||||||
|
pid=$(cat "$pidfile" 2>/dev/null || echo "")
|
||||||
|
[ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fetch open issues labelled bug-report that have no outcome label yet.
|
||||||
|
# Returns a newline-separated list of "issue_number:project_toml" pairs.
|
||||||
|
fetch_reproduce_candidates() {
|
||||||
|
# Require FORGE_TOKEN, FORGE_URL, FORGE_REPO
|
||||||
|
[ -n "${FORGE_TOKEN:-}" ] || return 0
|
||||||
|
[ -n "${FORGE_URL:-}" ] || return 0
|
||||||
|
[ -n "${FORGE_REPO:-}" ] || return 0
|
||||||
|
|
||||||
|
local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
|
||||||
|
|
||||||
|
local issues_json
|
||||||
|
issues_json=$(curl -sf \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
"${api}/issues?type=issues&state=open&labels=bug-report&limit=20" 2>/dev/null) || return 0
|
||||||
|
|
||||||
|
# Filter out issues that already carry an outcome label.
|
||||||
|
# Write JSON to a temp file so python3 can read from stdin (heredoc) and
|
||||||
|
# still receive the JSON as an argument (avoids SC2259: pipe vs heredoc).
|
||||||
|
local tmpjson
|
||||||
|
tmpjson=$(mktemp)
|
||||||
|
echo "$issues_json" > "$tmpjson"
|
||||||
|
python3 - "$tmpjson" <<'PYEOF'
|
||||||
|
import sys, json
|
||||||
|
data = json.load(open(sys.argv[1]))
|
||||||
|
skip = {"reproduced", "cannot-reproduce", "needs-triage"}
|
||||||
|
for issue in data:
|
||||||
|
labels = {l["name"] for l in (issue.get("labels") or [])}
|
||||||
|
if labels & skip:
|
||||||
|
continue
|
||||||
|
print(issue["number"])
|
||||||
|
PYEOF
|
||||||
|
rm -f "$tmpjson"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Launch one reproduce container per candidate issue.
|
||||||
|
# project_toml is resolved from FACTORY_ROOT/projects/*.toml (first match).
|
||||||
|
dispatch_reproduce() {
|
||||||
|
local issue_number="$1"
|
||||||
|
|
||||||
|
if is_reproduce_running "$issue_number"; then
|
||||||
|
log "Reproduce already running for issue #${issue_number}, skipping"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Find first project TOML available (same convention as dev-poll)
|
||||||
|
local project_toml=""
|
||||||
|
for toml in "${FACTORY_ROOT}"/projects/*.toml; do
|
||||||
|
[ -f "$toml" ] && { project_toml="$toml"; break; }
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$project_toml" ]; then
|
||||||
|
log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping reproduce for #${issue_number}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "Dispatching reproduce-agent for issue #${issue_number} (project: ${project_toml})"
|
||||||
|
|
||||||
|
# Build docker run command using array (safe from injection)
|
||||||
|
local -a cmd=(docker run --rm
|
||||||
|
--name "disinto-reproduce-${issue_number}"
|
||||||
|
--network host
|
||||||
|
-v /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
-v agent-data:/home/agent/data
|
||||||
|
-v project-repos:/home/agent/repos
|
||||||
|
-e "FORGE_URL=${FORGE_URL}"
|
||||||
|
-e "FORGE_TOKEN=${FORGE_TOKEN}"
|
||||||
|
-e "FORGE_REPO=${FORGE_REPO}"
|
||||||
|
-e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
|
||||||
|
-e DISINTO_CONTAINER=1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pass through ANTHROPIC_API_KEY if set
|
||||||
|
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
|
||||||
|
cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Mount ~/.claude and ~/.ssh from the runtime user's home if available
|
||||||
|
local runtime_home="${HOME:-/home/debian}"
|
||||||
|
if [ -d "${runtime_home}/.claude" ]; then
|
||||||
|
cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
|
||||||
|
fi
|
||||||
|
if [ -d "${runtime_home}/.ssh" ]; then
|
||||||
|
cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
|
||||||
|
fi
|
||||||
|
# Mount claude CLI binary if present on host
|
||||||
|
if [ -f /usr/local/bin/claude ]; then
|
||||||
|
cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Mount the project TOML into the container at a stable path
|
||||||
|
local container_toml="/home/agent/project.toml"
|
||||||
|
cmd+=(-v "${project_toml}:${container_toml}:ro")
|
||||||
|
|
||||||
|
cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
|
||||||
|
|
||||||
|
# Launch in background; write pid-file so we don't double-launch
|
||||||
|
"${cmd[@]}" &
|
||||||
|
local bg_pid=$!
|
||||||
|
echo "$bg_pid" > "$(_reproduce_lockfile "$issue_number")"
|
||||||
|
log "Reproduce container launched (pid ${bg_pid}) for issue #${issue_number}"
|
||||||
|
}
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# Main dispatcher loop
|
# Main dispatcher loop
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
@ -501,6 +624,16 @@ main() {
|
||||||
launch_runner "$toml_file" || true
|
launch_runner "$toml_file" || true
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Reproduce dispatch: check for bug-report issues needing reproduction
|
||||||
|
local candidate_issues
|
||||||
|
candidate_issues=$(fetch_reproduce_candidates) || true
|
||||||
|
if [ -n "$candidate_issues" ]; then
|
||||||
|
while IFS= read -r issue_num; do
|
||||||
|
[ -n "$issue_num" ] || continue
|
||||||
|
dispatch_reproduce "$issue_num" || true
|
||||||
|
done <<< "$candidate_issues"
|
||||||
|
fi
|
||||||
|
|
||||||
# Wait before next poll
|
# Wait before next poll
|
||||||
sleep 60
|
sleep 60
|
||||||
done
|
done
|
||||||
|
|
|
||||||
11
docker/reproduce/Dockerfile
Normal file
11
docker/reproduce/Dockerfile
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
bash curl git jq docker.io docker-compose-plugin \
|
||||||
|
nodejs npm chromium \
|
||||||
|
&& npm install -g @anthropic-ai/mcp-playwright \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
RUN useradd -m -u 1000 -s /bin/bash agent
|
||||||
|
COPY docker/reproduce/entrypoint-reproduce.sh /entrypoint-reproduce.sh
|
||||||
|
RUN chmod +x /entrypoint-reproduce.sh
|
||||||
|
WORKDIR /home/agent
|
||||||
|
ENTRYPOINT ["/entrypoint-reproduce.sh"]
|
||||||
404
docker/reproduce/entrypoint-reproduce.sh
Normal file
404
docker/reproduce/entrypoint-reproduce.sh
Normal file
|
|
@ -0,0 +1,404 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# entrypoint-reproduce.sh — Reproduce-agent sidecar entrypoint
|
||||||
|
#
|
||||||
|
# Acquires the stack lock, boots the project stack (if formula declares
|
||||||
|
# stack_script), then drives Claude + Playwright MCP to follow the bug
|
||||||
|
# report's repro steps. Labels the issue based on outcome and posts
|
||||||
|
# findings + screenshots.
|
||||||
|
#
|
||||||
|
# Usage (launched by dispatcher.sh):
|
||||||
|
# entrypoint-reproduce.sh <project_toml> <issue_number>
|
||||||
|
#
|
||||||
|
# Environment (injected by dispatcher via docker run -e):
|
||||||
|
# FORGE_URL, FORGE_TOKEN, FORGE_REPO, PRIMARY_BRANCH, DISINTO_CONTAINER=1
|
||||||
|
#
|
||||||
|
# Volumes expected:
|
||||||
|
# /home/agent/data — agent-data volume (stack-lock files go here)
|
||||||
|
# /home/agent/repos — project-repos volume
|
||||||
|
# /home/agent/.claude — host ~/.claude (OAuth credentials)
|
||||||
|
# /home/agent/.ssh — host ~/.ssh (read-only)
|
||||||
|
# /usr/local/bin/claude — host claude CLI binary (read-only)
|
||||||
|
# /var/run/docker.sock — host docker socket
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
DISINTO_DIR="${DISINTO_DIR:-/home/agent/disinto}"
|
||||||
|
REPRODUCE_FORMULA="${DISINTO_DIR}/formulas/reproduce.toml"
|
||||||
|
REPRODUCE_TIMEOUT="${REPRODUCE_TIMEOUT_MINUTES:-15}"
|
||||||
|
LOGFILE="/home/agent/data/logs/reproduce.log"
|
||||||
|
SCREENSHOT_DIR="/home/agent/data/screenshots"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Logging
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
log() {
|
||||||
|
printf '[%s] reproduce: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" | tee -a "$LOGFILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Argument validation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
PROJECT_TOML="${1:-}"
|
||||||
|
ISSUE_NUMBER="${2:-}"
|
||||||
|
|
||||||
|
if [ -z "$PROJECT_TOML" ] || [ -z "$ISSUE_NUMBER" ]; then
|
||||||
|
log "FATAL: usage: entrypoint-reproduce.sh <project_toml> <issue_number>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "$PROJECT_TOML" ]; then
|
||||||
|
log "FATAL: project TOML not found: ${PROJECT_TOML}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Bootstrap: directories, env
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
mkdir -p /home/agent/data/logs /home/agent/data/locks "$SCREENSHOT_DIR"
|
||||||
|
|
||||||
|
export DISINTO_CONTAINER=1
|
||||||
|
export HOME="${HOME:-/home/agent}"
|
||||||
|
export USER="${USER:-agent}"
|
||||||
|
|
||||||
|
FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
|
||||||
|
|
||||||
|
# Load project name from TOML
|
||||||
|
PROJECT_NAME=$(python3 -c "
|
||||||
|
import sys, tomllib
|
||||||
|
with open(sys.argv[1], 'rb') as f:
|
||||||
|
print(tomllib.load(f)['name'])
|
||||||
|
" "$PROJECT_TOML" 2>/dev/null) || {
|
||||||
|
log "FATAL: could not read project name from ${PROJECT_TOML}"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
export PROJECT_NAME
|
||||||
|
|
||||||
|
PROJECT_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}"
|
||||||
|
|
||||||
|
log "Starting reproduce-agent for issue #${ISSUE_NUMBER} (project: ${PROJECT_NAME})"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Verify claude CLI is available (mounted from host)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
if ! command -v claude &>/dev/null; then
|
||||||
|
log "FATAL: claude CLI not found. Mount the host binary at /usr/local/bin/claude"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Source stack-lock library
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# shellcheck source=/home/agent/disinto/lib/stack-lock.sh
|
||||||
|
source "${DISINTO_DIR}/lib/stack-lock.sh"
|
||||||
|
|
||||||
|
LOCK_HOLDER="reproduce-agent-${ISSUE_NUMBER}"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Read formula config
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
FORMULA_STACK_SCRIPT=""
|
||||||
|
FORMULA_TIMEOUT_MINUTES="${REPRODUCE_TIMEOUT}"
|
||||||
|
|
||||||
|
if [ -f "$REPRODUCE_FORMULA" ]; then
|
||||||
|
FORMULA_STACK_SCRIPT=$(python3 -c "
|
||||||
|
import sys, tomllib
|
||||||
|
with open(sys.argv[1], 'rb') as f:
|
||||||
|
d = tomllib.load(f)
|
||||||
|
print(d.get('stack_script', ''))
|
||||||
|
" "$REPRODUCE_FORMULA" 2>/dev/null || echo "")
|
||||||
|
|
||||||
|
_tm=$(python3 -c "
|
||||||
|
import sys, tomllib
|
||||||
|
with open(sys.argv[1], 'rb') as f:
|
||||||
|
d = tomllib.load(f)
|
||||||
|
print(d.get('timeout_minutes', '${REPRODUCE_TIMEOUT}'))
|
||||||
|
" "$REPRODUCE_FORMULA" 2>/dev/null || echo "${REPRODUCE_TIMEOUT}")
|
||||||
|
FORMULA_TIMEOUT_MINUTES="$_tm"
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "Formula stack_script: '${FORMULA_STACK_SCRIPT}'"
|
||||||
|
log "Formula timeout: ${FORMULA_TIMEOUT_MINUTES}m"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fetch issue details for repro steps
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
log "Fetching issue #${ISSUE_NUMBER} from ${FORGE_API}..."
|
||||||
|
ISSUE_JSON=$(curl -sf \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
"${FORGE_API}/issues/${ISSUE_NUMBER}" 2>/dev/null) || {
|
||||||
|
log "ERROR: failed to fetch issue #${ISSUE_NUMBER}"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
ISSUE_TITLE=$(echo "$ISSUE_JSON" | jq -r '.title // "unknown"')
|
||||||
|
ISSUE_BODY=$(echo "$ISSUE_JSON" | jq -r '.body // ""')
|
||||||
|
|
||||||
|
log "Issue: ${ISSUE_TITLE}"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Acquire stack lock
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
log "Acquiring stack lock for project ${PROJECT_NAME}..."
|
||||||
|
stack_lock_acquire "$LOCK_HOLDER" "$PROJECT_NAME" 900
|
||||||
|
trap 'stack_lock_release "$PROJECT_NAME" "$LOCK_HOLDER"; log "Stack lock released (trap)"' EXIT
|
||||||
|
log "Stack lock acquired."
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Start heartbeat in background (every 2 minutes)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
heartbeat_loop() {
|
||||||
|
while true; do
|
||||||
|
sleep 120
|
||||||
|
stack_lock_heartbeat "$LOCK_HOLDER" "$PROJECT_NAME" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
}
|
||||||
|
heartbeat_loop &
|
||||||
|
HEARTBEAT_PID=$!
|
||||||
|
trap 'kill "$HEARTBEAT_PID" 2>/dev/null; stack_lock_release "$PROJECT_NAME" "$LOCK_HOLDER"; log "Stack lock released (trap)"' EXIT
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Boot the project stack if formula declares stack_script
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
if [ -n "$FORMULA_STACK_SCRIPT" ] && [ -d "$PROJECT_REPO_ROOT" ]; then
|
||||||
|
log "Running stack_script: ${FORMULA_STACK_SCRIPT}"
|
||||||
|
# Run in project repo root; script path is relative to project repo.
|
||||||
|
# Read stack_script into array to allow arguments (e.g. "scripts/dev.sh restart --full").
|
||||||
|
read -ra _stack_cmd <<< "$FORMULA_STACK_SCRIPT"
|
||||||
|
(cd "$PROJECT_REPO_ROOT" && bash "${_stack_cmd[@]}") || {
|
||||||
|
log "WARNING: stack_script exited non-zero — continuing anyway"
|
||||||
|
}
|
||||||
|
# Give the stack a moment to stabilise
|
||||||
|
sleep 5
|
||||||
|
elif [ -n "$FORMULA_STACK_SCRIPT" ]; then
|
||||||
|
log "WARNING: PROJECT_REPO_ROOT not found at ${PROJECT_REPO_ROOT} — skipping stack_script"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Build Claude prompt for reproduction
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
TIMESTAMP=$(date -u '+%Y%m%d-%H%M%S')
|
||||||
|
SCREENSHOT_PREFIX="${SCREENSHOT_DIR}/issue-${ISSUE_NUMBER}-${TIMESTAMP}"
|
||||||
|
|
||||||
|
CLAUDE_PROMPT=$(cat <<PROMPT
|
||||||
|
You are the reproduce-agent. Your task is to reproduce the bug described in issue #${ISSUE_NUMBER} and report your findings.
|
||||||
|
|
||||||
|
## Issue title
|
||||||
|
${ISSUE_TITLE}
|
||||||
|
|
||||||
|
## Issue body
|
||||||
|
${ISSUE_BODY}
|
||||||
|
|
||||||
|
## Your task
|
||||||
|
|
||||||
|
1. **Reproduce the bug** — Use Playwright to navigate the application and follow the reproduction steps from the issue. Take screenshots at each key step and save them to: ${SCREENSHOT_PREFIX}-step-N.png
|
||||||
|
|
||||||
|
2. **Determine outcome** — Did the bug reproduce?
|
||||||
|
- YES: Proceed to step 3
|
||||||
|
- NO: Write OUTCOME=cannot-reproduce and skip to step 5
|
||||||
|
|
||||||
|
3. **Check logs** — Run: docker compose -f ${PROJECT_REPO_ROOT}/docker-compose.yml logs --tail=200
|
||||||
|
Look for: stack traces, error messages, wrong addresses, missing config, HTTP error codes.
|
||||||
|
|
||||||
|
4. **Assess root cause** — Based on logs + browser observations:
|
||||||
|
- FOUND: Write OUTCOME=reproduced and ROOT_CAUSE=<one-line summary>
|
||||||
|
- INCONCLUSIVE: Write OUTCOME=needs-triage
|
||||||
|
|
||||||
|
5. **Write findings** — Write a markdown report to: /tmp/reproduce-findings-${ISSUE_NUMBER}.md
|
||||||
|
Include:
|
||||||
|
- Steps you followed
|
||||||
|
- What you observed (screenshots referenced by path)
|
||||||
|
- Log excerpts (truncated to relevant lines)
|
||||||
|
- OUTCOME line (one of: reproduced, cannot-reproduce, needs-triage)
|
||||||
|
- ROOT_CAUSE line (if outcome is reproduced)
|
||||||
|
|
||||||
|
6. **Write outcome file** — Write ONLY the outcome word to: /tmp/reproduce-outcome-${ISSUE_NUMBER}.txt
|
||||||
|
(one of: reproduced, cannot-reproduce, needs-triage)
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
- The application is accessible at localhost (network_mode: host)
|
||||||
|
- Take screenshots liberally — they are evidence
|
||||||
|
- If the app is not running or not reachable, write outcome: cannot-reproduce with reason "stack not reachable"
|
||||||
|
- Timeout: ${FORMULA_TIMEOUT_MINUTES} minutes total
|
||||||
|
|
||||||
|
Begin now.
|
||||||
|
PROMPT
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Run Claude with Playwright MCP
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
log "Starting Claude reproduction session (timeout: ${FORMULA_TIMEOUT_MINUTES}m)..."
|
||||||
|
|
||||||
|
CLAUDE_EXIT=0
|
||||||
|
timeout "$(( FORMULA_TIMEOUT_MINUTES * 60 ))" \
|
||||||
|
claude -p "$CLAUDE_PROMPT" \
|
||||||
|
--mcp-server playwright \
|
||||||
|
--output-format text \
|
||||||
|
--max-turns 40 \
|
||||||
|
> "/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>&1 || CLAUDE_EXIT=$?
|
||||||
|
|
||||||
|
if [ $CLAUDE_EXIT -eq 124 ]; then
|
||||||
|
log "WARNING: Claude session timed out after ${FORMULA_TIMEOUT_MINUTES}m"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Read outcome
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
OUTCOME="needs-triage"
|
||||||
|
if [ -f "/tmp/reproduce-outcome-${ISSUE_NUMBER}.txt" ]; then
|
||||||
|
_raw=$(tr -d '[:space:]' < "/tmp/reproduce-outcome-${ISSUE_NUMBER}.txt" | tr '[:upper:]' '[:lower:]')
|
||||||
|
case "$_raw" in
|
||||||
|
reproduced|cannot-reproduce|needs-triage)
|
||||||
|
OUTCOME="$_raw"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
log "WARNING: unexpected outcome '${_raw}' — defaulting to needs-triage"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
else
|
||||||
|
log "WARNING: outcome file not found — defaulting to needs-triage"
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "Outcome: ${OUTCOME}"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Read findings
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
FINDINGS=""
|
||||||
|
if [ -f "/tmp/reproduce-findings-${ISSUE_NUMBER}.md" ]; then
|
||||||
|
FINDINGS=$(cat "/tmp/reproduce-findings-${ISSUE_NUMBER}.md")
|
||||||
|
else
|
||||||
|
FINDINGS="Reproduce-agent completed but did not write a findings report. Claude output:\n\`\`\`\n$(tail -100 "/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>/dev/null || echo '(no output)')\n\`\`\`"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Collect screenshot paths for comment
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
SCREENSHOT_LIST=""
|
||||||
|
if find "$(dirname "${SCREENSHOT_PREFIX}")" -name "$(basename "${SCREENSHOT_PREFIX}")-*.png" -maxdepth 1 2>/dev/null | grep -q .; then
|
||||||
|
SCREENSHOT_LIST="\n\n**Screenshots taken:**\n"
|
||||||
|
for f in "${SCREENSHOT_PREFIX}"-*.png; do
|
||||||
|
SCREENSHOT_LIST="${SCREENSHOT_LIST}- \`$(basename "$f")\`\n"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Label helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
_label_id() {
|
||||||
|
local name="$1" color="$2"
|
||||||
|
local id
|
||||||
|
id=$(curl -sf \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
"${FORGE_API}/labels" 2>/dev/null \
|
||||||
|
| jq -r --arg n "$name" '.[] | select(.name == $n) | .id' 2>/dev/null || echo "")
|
||||||
|
if [ -z "$id" ]; then
|
||||||
|
id=$(curl -sf -X POST \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
"${FORGE_API}/labels" \
|
||||||
|
-d "{\"name\":\"${name}\",\"color\":\"${color}\"}" 2>/dev/null \
|
||||||
|
| jq -r '.id // empty' 2>/dev/null || echo "")
|
||||||
|
fi
|
||||||
|
echo "$id"
|
||||||
|
}
|
||||||
|
|
||||||
|
_add_label() {
|
||||||
|
local issue="$1" label_id="$2"
|
||||||
|
[ -z "$label_id" ] && return 0
|
||||||
|
curl -sf -X POST \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
"${FORGE_API}/issues/${issue}/labels" \
|
||||||
|
-d "{\"labels\":[${label_id}]}" >/dev/null 2>&1 || true
|
||||||
|
}
|
||||||
|
|
||||||
|
_remove_label() {
|
||||||
|
local issue="$1" label_id="$2"
|
||||||
|
[ -z "$label_id" ] && return 0
|
||||||
|
curl -sf -X DELETE \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
"${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1 || true
|
||||||
|
}
|
||||||
|
|
||||||
|
_post_comment() {
|
||||||
|
local issue="$1" body="$2"
|
||||||
|
curl -sf -X POST \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
"${FORGE_API}/issues/${issue}/comments" \
|
||||||
|
-d "$(jq -nc --arg b "$body" '{body:$b}')" >/dev/null 2>&1 || true
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Apply labels and post findings
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Remove bug-report label (we are resolving it)
|
||||||
|
BUG_REPORT_ID=$(_label_id "bug-report" "#e4e669")
|
||||||
|
_remove_label "$ISSUE_NUMBER" "$BUG_REPORT_ID"
|
||||||
|
|
||||||
|
case "$OUTCOME" in
|
||||||
|
reproduced)
|
||||||
|
LABEL_NAME="reproduced"
|
||||||
|
LABEL_COLOR="#0075ca"
|
||||||
|
COMMENT_HEADER="## Reproduce-agent: **Reproduced** :white_check_mark:"
|
||||||
|
|
||||||
|
# Create a backlog issue for the triage/dev agents
|
||||||
|
ROOT_CAUSE=$(grep -m1 "^ROOT_CAUSE=" "/tmp/reproduce-findings-${ISSUE_NUMBER}.md" 2>/dev/null \
|
||||||
|
| sed 's/^ROOT_CAUSE=//' || echo "See findings on issue #${ISSUE_NUMBER}")
|
||||||
|
BACKLOG_BODY="## Summary
|
||||||
|
Bug reproduced from issue #${ISSUE_NUMBER}: ${ISSUE_TITLE}
|
||||||
|
|
||||||
|
Root cause (quick log analysis): ${ROOT_CAUSE}
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
- #${ISSUE_NUMBER}
|
||||||
|
|
||||||
|
## Affected files
|
||||||
|
- (see findings on issue #${ISSUE_NUMBER})
|
||||||
|
|
||||||
|
## Acceptance criteria
|
||||||
|
- [ ] Root cause confirmed and fixed
|
||||||
|
- [ ] Issue #${ISSUE_NUMBER} no longer reproducible"
|
||||||
|
|
||||||
|
log "Creating backlog issue for reproduced bug..."
|
||||||
|
curl -sf -X POST \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
"${FORGE_API}/issues" \
|
||||||
|
-d "$(jq -nc \
|
||||||
|
--arg t "fix: $(echo "$ISSUE_TITLE" | sed 's/^bug:/fix:/' | sed 's/^feat:/fix:/')" \
|
||||||
|
--arg b "$BACKLOG_BODY" \
|
||||||
|
'{title:$t, body:$b}')" >/dev/null 2>&1 || \
|
||||||
|
log "WARNING: failed to create backlog issue"
|
||||||
|
;;
|
||||||
|
|
||||||
|
cannot-reproduce)
|
||||||
|
LABEL_NAME="cannot-reproduce"
|
||||||
|
LABEL_COLOR="#e4e669"
|
||||||
|
COMMENT_HEADER="## Reproduce-agent: **Cannot reproduce** :x:"
|
||||||
|
;;
|
||||||
|
|
||||||
|
needs-triage)
|
||||||
|
LABEL_NAME="needs-triage"
|
||||||
|
LABEL_COLOR="#d93f0b"
|
||||||
|
COMMENT_HEADER="## Reproduce-agent: **Needs triage** :mag:"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
OUTCOME_LABEL_ID=$(_label_id "$LABEL_NAME" "$LABEL_COLOR")
|
||||||
|
_add_label "$ISSUE_NUMBER" "$OUTCOME_LABEL_ID"
|
||||||
|
log "Applied label '${LABEL_NAME}' to issue #${ISSUE_NUMBER}"
|
||||||
|
|
||||||
|
COMMENT_BODY="${COMMENT_HEADER}
|
||||||
|
|
||||||
|
${FINDINGS}${SCREENSHOT_LIST}
|
||||||
|
|
||||||
|
---
|
||||||
|
*Reproduce-agent run at $(date -u '+%Y-%m-%d %H:%M:%S UTC') — project: ${PROJECT_NAME}*"
|
||||||
|
|
||||||
|
_post_comment "$ISSUE_NUMBER" "$COMMENT_BODY"
|
||||||
|
log "Posted findings to issue #${ISSUE_NUMBER}"
|
||||||
|
|
||||||
|
log "Reproduce-agent done. Outcome: ${OUTCOME}"
|
||||||
23
formulas/reproduce.toml
Normal file
23
formulas/reproduce.toml
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
# formulas/reproduce.toml — Reproduce-agent formula
|
||||||
|
#
|
||||||
|
# Declares the reproduce-agent's runtime parameters.
|
||||||
|
# The dispatcher reads this to configure the sidecar container.
|
||||||
|
#
|
||||||
|
# stack_script: path (relative to PROJECT_REPO_ROOT) of the script used to
|
||||||
|
# restart/rebuild the project stack before reproduction. Omit (or leave
|
||||||
|
# blank) to connect to an existing staging environment instead.
|
||||||
|
#
|
||||||
|
# tools: MCP servers to pass to claude via --mcp-server flags.
|
||||||
|
#
|
||||||
|
# timeout_minutes: hard upper bound on the Claude session.
|
||||||
|
|
||||||
|
name = "reproduce"
|
||||||
|
description = "Navigate the app via Playwright, reproduce a bug-report issue, and do a quick log-based root cause check"
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
# Set stack_script to the restart command for local stacks.
|
||||||
|
# Leave empty ("") to target an existing staging environment.
|
||||||
|
stack_script = ""
|
||||||
|
|
||||||
|
tools = ["playwright"]
|
||||||
|
timeout_minutes = 15
|
||||||
Loading…
Add table
Add a link
Reference in a new issue