fix: feat: extend edge container with Playwright and docker compose for bug reproduction (#256)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-06 07:45:14 +00:00 · 2026-04-06 07:45:14 +00:00 · 300f335179
commit 300f335179
parent ca3459ec61
5 changed files with 588 additions and 0 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -39,6 +39,23 @@ services:
    depends_on:
      - forgejo
  reproduce:
    build:
      context: .
      dockerfile: docker/reproduce/Dockerfile
    image: disinto-reproduce:latest
    network_mode: host
    profiles: ["reproduce"]
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - agent-data:/home/agent/data
      - project-repos:/home/agent/repos
      - ${HOME}/.claude:/home/agent/.claude
      - /usr/local/bin/claude:/usr/local/bin/claude:ro
      - ${HOME}/.ssh:/home/agent/.ssh:ro
    env_file:
      - .env
  forgejo:
    image: codeberg.org/forgejo/forgejo:1
    container_name: disinto-forgejo
--- a/docker/edge/dispatcher.sh
+++ b/docker/edge/dispatcher.sh
@ -451,6 +451,129 @@ launch_runner() {
  return $exit_code
 }
 # -----------------------------------------------------------------------------
 # Reproduce dispatch — launch sidecar for bug-report issues
 # -----------------------------------------------------------------------------
 # Check if a reproduce run is already in-flight for a given issue.
 # Uses a simple pid-file in /tmp so we don't double-launch per dispatcher cycle.
 _reproduce_lockfile() {
  local issue="$1"
  echo "/tmp/reproduce-inflight-${issue}.pid"
 }
 is_reproduce_running() {
  local issue="$1"
  local pidfile
  pidfile=$(_reproduce_lockfile "$issue")
  [ -f "$pidfile" ] || return 1
  local pid
  pid=$(cat "$pidfile" 2>/dev/null || echo "")
  [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
 }
 # Fetch open issues labelled bug-report that have no outcome label yet.
 # Returns a newline-separated list of "issue_number:project_toml" pairs.
 fetch_reproduce_candidates() {
  # Require FORGE_TOKEN, FORGE_URL, FORGE_REPO
  [ -n "${FORGE_TOKEN:-}" ] || return 0
  [ -n "${FORGE_URL:-}" ]   || return 0
  [ -n "${FORGE_REPO:-}" ]  || return 0
  local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
  local issues_json
  issues_json=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${api}/issues?type=issues&state=open&labels=bug-report&limit=20" 2>/dev/null) || return 0
  # Filter out issues that already carry an outcome label.
  # Write JSON to a temp file so python3 can read from stdin (heredoc) and
  # still receive the JSON as an argument (avoids SC2259: pipe vs heredoc).
  local tmpjson
  tmpjson=$(mktemp)
  echo "$issues_json" > "$tmpjson"
  python3 - "$tmpjson" <<'PYEOF'
 import sys, json
 data = json.load(open(sys.argv[1]))
 skip = {"reproduced", "cannot-reproduce", "needs-triage"}
 for issue in data:
    labels = {l["name"] for l in (issue.get("labels") or [])}
    if labels & skip:
        continue
    print(issue["number"])
 PYEOF
  rm -f "$tmpjson"
 }
 # Launch one reproduce container per candidate issue.
 # project_toml is resolved from FACTORY_ROOT/projects/*.toml (first match).
 dispatch_reproduce() {
  local issue_number="$1"
  if is_reproduce_running "$issue_number"; then
    log "Reproduce already running for issue #${issue_number}, skipping"
    return 0
  fi
  # Find first project TOML available (same convention as dev-poll)
  local project_toml=""
  for toml in "${FACTORY_ROOT}"/projects/*.toml; do
    [ -f "$toml" ] && { project_toml="$toml"; break; }
  done
  if [ -z "$project_toml" ]; then
    log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping reproduce for #${issue_number}"
    return 0
  fi
  log "Dispatching reproduce-agent for issue #${issue_number} (project: ${project_toml})"
  # Build docker run command using array (safe from injection)
  local -a cmd=(docker run --rm
    --name "disinto-reproduce-${issue_number}"
    --network host
    -v /var/run/docker.sock:/var/run/docker.sock
    -v agent-data:/home/agent/data
    -v project-repos:/home/agent/repos
    -e "FORGE_URL=${FORGE_URL}"
    -e "FORGE_TOKEN=${FORGE_TOKEN}"
    -e "FORGE_REPO=${FORGE_REPO}"
    -e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
    -e DISINTO_CONTAINER=1
  )
  # Pass through ANTHROPIC_API_KEY if set
  if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
    cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
  fi
  # Mount ~/.claude and ~/.ssh from the runtime user's home if available
  local runtime_home="${HOME:-/home/debian}"
  if [ -d "${runtime_home}/.claude" ]; then
    cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
  fi
  if [ -d "${runtime_home}/.ssh" ]; then
    cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
  fi
  # Mount claude CLI binary if present on host
  if [ -f /usr/local/bin/claude ]; then
    cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
  fi
  # Mount the project TOML into the container at a stable path
  local container_toml="/home/agent/project.toml"
  cmd+=(-v "${project_toml}:${container_toml}:ro")
  cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
  # Launch in background; write pid-file so we don't double-launch
  "${cmd[@]}" &
  local bg_pid=$!
  echo "$bg_pid" > "$(_reproduce_lockfile "$issue_number")"
  log "Reproduce container launched (pid ${bg_pid}) for issue #${issue_number}"
 }
 # -----------------------------------------------------------------------------
 # Main dispatcher loop
 # -----------------------------------------------------------------------------
@ -501,6 +624,16 @@ main() {
      launch_runner "$toml_file" || true
    done
    # Reproduce dispatch: check for bug-report issues needing reproduction
    local candidate_issues
    candidate_issues=$(fetch_reproduce_candidates) || true
    if [ -n "$candidate_issues" ]; then
      while IFS= read -r issue_num; do
        [ -n "$issue_num" ] || continue
        dispatch_reproduce "$issue_num" || true
      done <<< "$candidate_issues"
    fi
    # Wait before next poll
    sleep 60
  done
--- a/docker/reproduce/Dockerfile
+++ b/docker/reproduce/Dockerfile
@ -0,0 +1,11 @@
 FROM debian:bookworm-slim
 RUN apt-get update && apt-get install -y --no-install-recommends \
    bash curl git jq docker.io docker-compose-plugin \
    nodejs npm chromium \
    && npm install -g @anthropic-ai/mcp-playwright \
    && rm -rf /var/lib/apt/lists/*
 RUN useradd -m -u 1000 -s /bin/bash agent
 COPY docker/reproduce/entrypoint-reproduce.sh /entrypoint-reproduce.sh
 RUN chmod +x /entrypoint-reproduce.sh
 WORKDIR /home/agent
 ENTRYPOINT ["/entrypoint-reproduce.sh"]
--- a/docker/reproduce/entrypoint-reproduce.sh
+++ b/docker/reproduce/entrypoint-reproduce.sh
@ -0,0 +1,404 @@
 #!/usr/bin/env bash
 # entrypoint-reproduce.sh — Reproduce-agent sidecar entrypoint
 #
 # Acquires the stack lock, boots the project stack (if formula declares
 # stack_script), then drives Claude + Playwright MCP to follow the bug
 # report's repro steps.  Labels the issue based on outcome and posts
 # findings + screenshots.
 #
 # Usage (launched by dispatcher.sh):
 #   entrypoint-reproduce.sh <project_toml> <issue_number>
 #
 # Environment (injected by dispatcher via docker run -e):
 #   FORGE_URL, FORGE_TOKEN, FORGE_REPO, PRIMARY_BRANCH, DISINTO_CONTAINER=1
 #
 # Volumes expected:
 #   /home/agent/data          — agent-data volume (stack-lock files go here)
 #   /home/agent/repos         — project-repos volume
 #   /home/agent/.claude       — host ~/.claude (OAuth credentials)
 #   /home/agent/.ssh          — host ~/.ssh (read-only)
 #   /usr/local/bin/claude     — host claude CLI binary (read-only)
 #   /var/run/docker.sock      — host docker socket
 set -euo pipefail
 DISINTO_DIR="${DISINTO_DIR:-/home/agent/disinto}"
 REPRODUCE_FORMULA="${DISINTO_DIR}/formulas/reproduce.toml"
 REPRODUCE_TIMEOUT="${REPRODUCE_TIMEOUT_MINUTES:-15}"
 LOGFILE="/home/agent/data/logs/reproduce.log"
 SCREENSHOT_DIR="/home/agent/data/screenshots"
 # ---------------------------------------------------------------------------
 # Logging
 # ---------------------------------------------------------------------------
 log() {
  printf '[%s] reproduce: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" | tee -a "$LOGFILE"
 }
 # ---------------------------------------------------------------------------
 # Argument validation
 # ---------------------------------------------------------------------------
 PROJECT_TOML="${1:-}"
 ISSUE_NUMBER="${2:-}"
 if [ -z "$PROJECT_TOML" ] || [ -z "$ISSUE_NUMBER" ]; then
  log "FATAL: usage: entrypoint-reproduce.sh <project_toml> <issue_number>"
  exit 1
 fi
 if [ ! -f "$PROJECT_TOML" ]; then
  log "FATAL: project TOML not found: ${PROJECT_TOML}"
  exit 1
 fi
 # ---------------------------------------------------------------------------
 # Bootstrap: directories, env
 # ---------------------------------------------------------------------------
 mkdir -p /home/agent/data/logs /home/agent/data/locks "$SCREENSHOT_DIR"
 export DISINTO_CONTAINER=1
 export HOME="${HOME:-/home/agent}"
 export USER="${USER:-agent}"
 FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
 # Load project name from TOML
 PROJECT_NAME=$(python3 -c "
 import sys, tomllib
 with open(sys.argv[1], 'rb') as f:
    print(tomllib.load(f)['name'])
 " "$PROJECT_TOML" 2>/dev/null) || {
  log "FATAL: could not read project name from ${PROJECT_TOML}"
  exit 1
 }
 export PROJECT_NAME
 PROJECT_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}"
 log "Starting reproduce-agent for issue #${ISSUE_NUMBER} (project: ${PROJECT_NAME})"
 # ---------------------------------------------------------------------------
 # Verify claude CLI is available (mounted from host)
 # ---------------------------------------------------------------------------
 if ! command -v claude &>/dev/null; then
  log "FATAL: claude CLI not found. Mount the host binary at /usr/local/bin/claude"
  exit 1
 fi
 # ---------------------------------------------------------------------------
 # Source stack-lock library
 # ---------------------------------------------------------------------------
 # shellcheck source=/home/agent/disinto/lib/stack-lock.sh
 source "${DISINTO_DIR}/lib/stack-lock.sh"
 LOCK_HOLDER="reproduce-agent-${ISSUE_NUMBER}"
 # ---------------------------------------------------------------------------
 # Read formula config
 # ---------------------------------------------------------------------------
 FORMULA_STACK_SCRIPT=""
 FORMULA_TIMEOUT_MINUTES="${REPRODUCE_TIMEOUT}"
 if [ -f "$REPRODUCE_FORMULA" ]; then
  FORMULA_STACK_SCRIPT=$(python3 -c "
 import sys, tomllib
 with open(sys.argv[1], 'rb') as f:
    d = tomllib.load(f)
 print(d.get('stack_script', ''))
 " "$REPRODUCE_FORMULA" 2>/dev/null || echo "")
  _tm=$(python3 -c "
 import sys, tomllib
 with open(sys.argv[1], 'rb') as f:
    d = tomllib.load(f)
 print(d.get('timeout_minutes', '${REPRODUCE_TIMEOUT}'))
 " "$REPRODUCE_FORMULA" 2>/dev/null || echo "${REPRODUCE_TIMEOUT}")
  FORMULA_TIMEOUT_MINUTES="$_tm"
 fi
 log "Formula stack_script: '${FORMULA_STACK_SCRIPT}'"
 log "Formula timeout: ${FORMULA_TIMEOUT_MINUTES}m"
 # ---------------------------------------------------------------------------
 # Fetch issue details for repro steps
 # ---------------------------------------------------------------------------
 log "Fetching issue #${ISSUE_NUMBER} from ${FORGE_API}..."
 ISSUE_JSON=$(curl -sf \
  -H "Authorization: token ${FORGE_TOKEN}" \
  "${FORGE_API}/issues/${ISSUE_NUMBER}" 2>/dev/null) || {
  log "ERROR: failed to fetch issue #${ISSUE_NUMBER}"
  exit 1
 }
 ISSUE_TITLE=$(echo "$ISSUE_JSON" | jq -r '.title // "unknown"')
 ISSUE_BODY=$(echo "$ISSUE_JSON" | jq -r '.body // ""')
 log "Issue: ${ISSUE_TITLE}"
 # ---------------------------------------------------------------------------
 # Acquire stack lock
 # ---------------------------------------------------------------------------
 log "Acquiring stack lock for project ${PROJECT_NAME}..."
 stack_lock_acquire "$LOCK_HOLDER" "$PROJECT_NAME" 900
 trap 'stack_lock_release "$PROJECT_NAME" "$LOCK_HOLDER"; log "Stack lock released (trap)"' EXIT
 log "Stack lock acquired."
 # ---------------------------------------------------------------------------
 # Start heartbeat in background (every 2 minutes)
 # ---------------------------------------------------------------------------
 heartbeat_loop() {
  while true; do
    sleep 120
    stack_lock_heartbeat "$LOCK_HOLDER" "$PROJECT_NAME" 2>/dev/null || true
  done
 }
 heartbeat_loop &
 HEARTBEAT_PID=$!
 trap 'kill "$HEARTBEAT_PID" 2>/dev/null; stack_lock_release "$PROJECT_NAME" "$LOCK_HOLDER"; log "Stack lock released (trap)"' EXIT
 # ---------------------------------------------------------------------------
 # Boot the project stack if formula declares stack_script
 # ---------------------------------------------------------------------------
 if [ -n "$FORMULA_STACK_SCRIPT" ] && [ -d "$PROJECT_REPO_ROOT" ]; then
  log "Running stack_script: ${FORMULA_STACK_SCRIPT}"
  # Run in project repo root; script path is relative to project repo.
  # Read stack_script into array to allow arguments (e.g. "scripts/dev.sh restart --full").
  read -ra _stack_cmd <<< "$FORMULA_STACK_SCRIPT"
  (cd "$PROJECT_REPO_ROOT" && bash "${_stack_cmd[@]}") || {
    log "WARNING: stack_script exited non-zero — continuing anyway"
  }
  # Give the stack a moment to stabilise
  sleep 5
 elif [ -n "$FORMULA_STACK_SCRIPT" ]; then
  log "WARNING: PROJECT_REPO_ROOT not found at ${PROJECT_REPO_ROOT} — skipping stack_script"
 fi
 # ---------------------------------------------------------------------------
 # Build Claude prompt for reproduction
 # ---------------------------------------------------------------------------
 TIMESTAMP=$(date -u '+%Y%m%d-%H%M%S')
 SCREENSHOT_PREFIX="${SCREENSHOT_DIR}/issue-${ISSUE_NUMBER}-${TIMESTAMP}"
 CLAUDE_PROMPT=$(cat <<PROMPT
 You are the reproduce-agent. Your task is to reproduce the bug described in issue #${ISSUE_NUMBER} and report your findings.
 ## Issue title
 ${ISSUE_TITLE}
 ## Issue body
 ${ISSUE_BODY}
 ## Your task
 1. **Reproduce the bug** — Use Playwright to navigate the application and follow the reproduction steps from the issue. Take screenshots at each key step and save them to: ${SCREENSHOT_PREFIX}-step-N.png
 2. **Determine outcome** — Did the bug reproduce?
   - YES: Proceed to step 3
   - NO: Write OUTCOME=cannot-reproduce and skip to step 5
 3. **Check logs** — Run: docker compose -f ${PROJECT_REPO_ROOT}/docker-compose.yml logs --tail=200
   Look for: stack traces, error messages, wrong addresses, missing config, HTTP error codes.
 4. **Assess root cause** — Based on logs + browser observations:
   - FOUND: Write OUTCOME=reproduced and ROOT_CAUSE=<one-line summary>
   - INCONCLUSIVE: Write OUTCOME=needs-triage
 5. **Write findings** — Write a markdown report to: /tmp/reproduce-findings-${ISSUE_NUMBER}.md
   Include:
   - Steps you followed
   - What you observed (screenshots referenced by path)
   - Log excerpts (truncated to relevant lines)
   - OUTCOME line (one of: reproduced, cannot-reproduce, needs-triage)
   - ROOT_CAUSE line (if outcome is reproduced)
 6. **Write outcome file** — Write ONLY the outcome word to: /tmp/reproduce-outcome-${ISSUE_NUMBER}.txt
   (one of: reproduced, cannot-reproduce, needs-triage)
 ## Notes
 - The application is accessible at localhost (network_mode: host)
 - Take screenshots liberally — they are evidence
 - If the app is not running or not reachable, write outcome: cannot-reproduce with reason "stack not reachable"
 - Timeout: ${FORMULA_TIMEOUT_MINUTES} minutes total
 Begin now.
 PROMPT
 )
 # ---------------------------------------------------------------------------
 # Run Claude with Playwright MCP
 # ---------------------------------------------------------------------------
 log "Starting Claude reproduction session (timeout: ${FORMULA_TIMEOUT_MINUTES}m)..."
 CLAUDE_EXIT=0
 timeout "$(( FORMULA_TIMEOUT_MINUTES * 60 ))" \
  claude -p "$CLAUDE_PROMPT" \
    --mcp-server playwright \
    --output-format text \
    --max-turns 40 \
  > "/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>&1 || CLAUDE_EXIT=$?
 if [ $CLAUDE_EXIT -eq 124 ]; then
  log "WARNING: Claude session timed out after ${FORMULA_TIMEOUT_MINUTES}m"
 fi
 # ---------------------------------------------------------------------------
 # Read outcome
 # ---------------------------------------------------------------------------
 OUTCOME="needs-triage"
 if [ -f "/tmp/reproduce-outcome-${ISSUE_NUMBER}.txt" ]; then
  _raw=$(tr -d '[:space:]' < "/tmp/reproduce-outcome-${ISSUE_NUMBER}.txt" | tr '[:upper:]' '[:lower:]')
  case "$_raw" in
    reproduced|cannot-reproduce|needs-triage)
      OUTCOME="$_raw"
      ;;
    *)
      log "WARNING: unexpected outcome '${_raw}' — defaulting to needs-triage"
      ;;
  esac
 else
  log "WARNING: outcome file not found — defaulting to needs-triage"
 fi
 log "Outcome: ${OUTCOME}"
 # ---------------------------------------------------------------------------
 # Read findings
 # ---------------------------------------------------------------------------
 FINDINGS=""
 if [ -f "/tmp/reproduce-findings-${ISSUE_NUMBER}.md" ]; then
  FINDINGS=$(cat "/tmp/reproduce-findings-${ISSUE_NUMBER}.md")
 else
  FINDINGS="Reproduce-agent completed but did not write a findings report. Claude output:\n\`\`\`\n$(tail -100 "/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>/dev/null || echo '(no output)')\n\`\`\`"
 fi
 # ---------------------------------------------------------------------------
 # Collect screenshot paths for comment
 # ---------------------------------------------------------------------------
 SCREENSHOT_LIST=""
 if find "$(dirname "${SCREENSHOT_PREFIX}")" -name "$(basename "${SCREENSHOT_PREFIX}")-*.png" -maxdepth 1 2>/dev/null | grep -q .; then
  SCREENSHOT_LIST="\n\n**Screenshots taken:**\n"
  for f in "${SCREENSHOT_PREFIX}"-*.png; do
    SCREENSHOT_LIST="${SCREENSHOT_LIST}- \`$(basename "$f")\`\n"
  done
 fi
 # ---------------------------------------------------------------------------
 # Label helpers
 # ---------------------------------------------------------------------------
 _label_id() {
  local name="$1" color="$2"
  local id
  id=$(curl -sf \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/labels" 2>/dev/null \
    | jq -r --arg n "$name" '.[] | select(.name == $n) | .id' 2>/dev/null || echo "")
  if [ -z "$id" ]; then
    id=$(curl -sf -X POST \
      -H "Authorization: token ${FORGE_TOKEN}" \
      -H "Content-Type: application/json" \
      "${FORGE_API}/labels" \
      -d "{\"name\":\"${name}\",\"color\":\"${color}\"}" 2>/dev/null \
      | jq -r '.id // empty' 2>/dev/null || echo "")
  fi
  echo "$id"
 }
 _add_label() {
  local issue="$1" label_id="$2"
  [ -z "$label_id" ] && return 0
  curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/issues/${issue}/labels" \
    -d "{\"labels\":[${label_id}]}" >/dev/null 2>&1 || true
 }
 _remove_label() {
  local issue="$1" label_id="$2"
  [ -z "$label_id" ] && return 0
  curl -sf -X DELETE \
    -H "Authorization: token ${FORGE_TOKEN}" \
    "${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1 || true
 }
 _post_comment() {
  local issue="$1" body="$2"
  curl -sf -X POST \
    -H "Authorization: token ${FORGE_TOKEN}" \
    -H "Content-Type: application/json" \
    "${FORGE_API}/issues/${issue}/comments" \
    -d "$(jq -nc --arg b "$body" '{body:$b}')" >/dev/null 2>&1 || true
 }
 # ---------------------------------------------------------------------------
 # Apply labels and post findings
 # ---------------------------------------------------------------------------
 # Remove bug-report label (we are resolving it)
 BUG_REPORT_ID=$(_label_id "bug-report" "#e4e669")
 _remove_label "$ISSUE_NUMBER" "$BUG_REPORT_ID"
 case "$OUTCOME" in
  reproduced)
    LABEL_NAME="reproduced"
    LABEL_COLOR="#0075ca"
    COMMENT_HEADER="## Reproduce-agent: **Reproduced** :white_check_mark:"
    # Create a backlog issue for the triage/dev agents
    ROOT_CAUSE=$(grep -m1 "^ROOT_CAUSE=" "/tmp/reproduce-findings-${ISSUE_NUMBER}.md" 2>/dev/null \
      | sed 's/^ROOT_CAUSE=//' || echo "See findings on issue #${ISSUE_NUMBER}")
    BACKLOG_BODY="## Summary
 Bug reproduced from issue #${ISSUE_NUMBER}: ${ISSUE_TITLE}
 Root cause (quick log analysis): ${ROOT_CAUSE}
 ## Dependencies
 - #${ISSUE_NUMBER}
 ## Affected files
 - (see findings on issue #${ISSUE_NUMBER})
 ## Acceptance criteria
 - [ ] Root cause confirmed and fixed
 - [ ] Issue #${ISSUE_NUMBER} no longer reproducible"
    log "Creating backlog issue for reproduced bug..."
    curl -sf -X POST \
      -H "Authorization: token ${FORGE_TOKEN}" \
      -H "Content-Type: application/json" \
      "${FORGE_API}/issues" \
      -d "$(jq -nc \
        --arg t "fix: $(echo "$ISSUE_TITLE" | sed 's/^bug:/fix:/' | sed 's/^feat:/fix:/')" \
        --arg b "$BACKLOG_BODY" \
        '{title:$t, body:$b}')" >/dev/null 2>&1 || \
      log "WARNING: failed to create backlog issue"
    ;;
  cannot-reproduce)
    LABEL_NAME="cannot-reproduce"
    LABEL_COLOR="#e4e669"
    COMMENT_HEADER="## Reproduce-agent: **Cannot reproduce** :x:"
    ;;
  needs-triage)
    LABEL_NAME="needs-triage"
    LABEL_COLOR="#d93f0b"
    COMMENT_HEADER="## Reproduce-agent: **Needs triage** :mag:"
    ;;
 esac
 OUTCOME_LABEL_ID=$(_label_id "$LABEL_NAME" "$LABEL_COLOR")
 _add_label "$ISSUE_NUMBER" "$OUTCOME_LABEL_ID"
 log "Applied label '${LABEL_NAME}' to issue #${ISSUE_NUMBER}"
 COMMENT_BODY="${COMMENT_HEADER}
 ${FINDINGS}${SCREENSHOT_LIST}
 ---
 *Reproduce-agent run at $(date -u '+%Y-%m-%d %H:%M:%S UTC') — project: ${PROJECT_NAME}*"
 _post_comment "$ISSUE_NUMBER" "$COMMENT_BODY"
 log "Posted findings to issue #${ISSUE_NUMBER}"
 log "Reproduce-agent done. Outcome: ${OUTCOME}"
--- a/formulas/reproduce.toml
+++ b/formulas/reproduce.toml
@ -0,0 +1,23 @@
 # formulas/reproduce.toml — Reproduce-agent formula
 #
 # Declares the reproduce-agent's runtime parameters.
 # The dispatcher reads this to configure the sidecar container.
 #
 # stack_script: path (relative to PROJECT_REPO_ROOT) of the script used to
 # restart/rebuild the project stack before reproduction.  Omit (or leave
 # blank) to connect to an existing staging environment instead.
 #
 # tools: MCP servers to pass to claude via --mcp-server flags.
 #
 # timeout_minutes: hard upper bound on the Claude session.
 name            = "reproduce"
 description     = "Navigate the app via Playwright, reproduce a bug-report issue, and do a quick log-based root cause check"
 version         = 1
 # Set stack_script to the restart command for local stacks.
 # Leave empty ("") to target an existing staging environment.
 stack_script    = ""
 tools           = ["playwright"]
 timeout_minutes = 15