#!/usr/bin/env bash # action-agent.sh — Autonomous action agent: tmux + Claude + action formula # # Usage: ./action-agent.sh [project.toml] # # Lifecycle: # 1. Fetch issue body (action formula) + existing comments # 2. Create isolated git worktree: /tmp/action-{issue}-{timestamp} # 3. Create tmux session: action-{project}-{issue_num} with interactive claude in worktree # 4. Inject initial prompt: formula + comments + phase protocol instructions # 5. Monitor phase file via monitor_phase_loop (shared with dev-agent) # Path A (git output): Claude pushes → handler creates PR → CI poll → review # injection → merge → cleanup (same loop as dev-agent via phase-handler.sh) # Path B (no git output): Claude posts results → PHASE:done → cleanup # 6. For human input: Claude asks via Matrix; reply injected via matrix_listener # 7. Cleanup on terminal phase: kill children, destroy worktree, remove temp files # # Key principle: The runtime creates and destroys. The formula preserves. # The formula must push results before signaling done — the worktree is nuked after. # # Session: action-{project}-{issue_num} (tmux) # Log: action/action-poll-{project}.log set -euo pipefail ISSUE="${1:?Usage: action-agent.sh [project.toml]}" export PROJECT_TOML="${2:-${PROJECT_TOML:-}}" source "$(dirname "$0")/../lib/env.sh" source "$(dirname "$0")/../lib/ci-helpers.sh" source "$(dirname "$0")/../lib/agent-session.sh" source "$(dirname "$0")/../lib/formula-session.sh" # shellcheck source=../dev/phase-handler.sh source "$(dirname "$0")/../dev/phase-handler.sh" SESSION_NAME="action-${PROJECT_NAME}-${ISSUE}" LOCKFILE="/tmp/action-agent-${ISSUE}.lock" LOGFILE="${FACTORY_ROOT}/action/action-poll-${PROJECT_NAME:-default}.log" THREAD_FILE="/tmp/action-thread-${ISSUE}" IDLE_TIMEOUT="${ACTION_IDLE_TIMEOUT:-14400}" # 4h default MAX_LIFETIME="${ACTION_MAX_LIFETIME:-28800}" # 8h default wall-clock cap SESSION_START_EPOCH=$(date +%s) # --- Phase handler globals (agent-specific; defaults in phase-handler.sh) --- # shellcheck disable=SC2034 # used by phase-handler.sh API="${FORGE_API}" BRANCH="action/issue-${ISSUE}" # shellcheck disable=SC2034 # used by phase-handler.sh WORKTREE="/tmp/action-${ISSUE}-$(date +%s)" PHASE_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.phase" IMPL_SUMMARY_FILE="/tmp/action-impl-summary-${PROJECT_NAME:-default}-${ISSUE}.txt" PREFLIGHT_RESULT="/tmp/action-preflight-${ISSUE}.json" SCRATCH_FILE="/tmp/action-${ISSUE}-scratch.md" log() { printf '[%s] action#%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$ISSUE" "$*" >> "$LOGFILE" } notify() { local thread_id="" [ -f "${THREAD_FILE:-}" ] && thread_id=$(cat "$THREAD_FILE" 2>/dev/null || true) matrix_send "action" "⚡ #${ISSUE}: $*" "${thread_id}" 2>/dev/null || true } notify_ctx() { local plain="$1" html="$2" thread_id="" [ -f "${THREAD_FILE:-}" ] && thread_id=$(cat "$THREAD_FILE" 2>/dev/null || true) if [ -n "$thread_id" ]; then matrix_send_ctx "action" "⚡ #${ISSUE}: ${plain}" "⚡ #${ISSUE}: ${html}" "${thread_id}" 2>/dev/null || true else matrix_send "action" "⚡ #${ISSUE}: ${plain}" "" "${ISSUE}" 2>/dev/null || true fi } status() { log "$*" } # --- Action-specific helpers for phase-handler.sh --- cleanup_worktree() { cd "${PROJECT_REPO_ROOT}" 2>/dev/null || true git worktree remove "$WORKTREE" --force 2>/dev/null || true rm -rf "$WORKTREE" # Clear Claude Code session history for this worktree to prevent hallucinated "already done" local claude_project_dir claude_project_dir="$HOME/.claude/projects/$(echo "$WORKTREE" | sed 's|/|-|g; s|^-||')" rm -rf "$claude_project_dir" 2>/dev/null || true log "destroyed worktree: ${WORKTREE}" } cleanup_labels() { :; } # action agent doesn't use in-progress labels # --- Concurrency lock (per issue) --- if [ -f "$LOCKFILE" ]; then LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then log "SKIP: action-agent already running for #${ISSUE} (PID ${LOCK_PID})" exit 0 fi rm -f "$LOCKFILE" fi echo $$ > "$LOCKFILE" cleanup() { # Kill lifetime watchdog if running if [ -n "${LIFETIME_WATCHDOG_PID:-}" ] && kill -0 "$LIFETIME_WATCHDOG_PID" 2>/dev/null; then kill "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true wait "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true fi rm -f "$LOCKFILE" agent_kill_session "$SESSION_NAME" # Kill any remaining child processes spawned during the run local children children=$(jobs -p 2>/dev/null) || true if [ -n "$children" ]; then # shellcheck disable=SC2086 # intentional word splitting kill $children 2>/dev/null || true # shellcheck disable=SC2086 wait $children 2>/dev/null || true fi # Best-effort docker cleanup for containers started during this action (cd "${WORKTREE}" 2>/dev/null && docker compose down 2>/dev/null) || true # Destroy the worktree — the runtime owns the lifecycle cleanup_worktree rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$PREFLIGHT_RESULT" } trap cleanup EXIT # --- Memory guard --- AVAIL_MB=$(awk '/MemAvailable/ {printf "%d", $2/1024}' /proc/meminfo) if [ "$AVAIL_MB" -lt 2000 ]; then log "SKIP: only ${AVAIL_MB}MB available (need 2000MB)" exit 0 fi # --- Fetch issue --- log "fetching issue #${ISSUE}" ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/issues/${ISSUE}") || true if [ -z "$ISSUE_JSON" ] || ! printf '%s' "$ISSUE_JSON" | jq -e '.id' >/dev/null 2>&1; then log "ERROR: failed to fetch issue #${ISSUE}" exit 1 fi ISSUE_TITLE=$(printf '%s' "$ISSUE_JSON" | jq -r '.title') ISSUE_BODY=$(printf '%s' "$ISSUE_JSON" | jq -r '.body // ""') ISSUE_STATE=$(printf '%s' "$ISSUE_JSON" | jq -r '.state') if [ "$ISSUE_STATE" != "open" ]; then log "SKIP: issue #${ISSUE} is ${ISSUE_STATE}" exit 0 fi log "Issue: ${ISSUE_TITLE}" # --- Extract model from YAML front matter (if present) --- YAML_MODEL=$(printf '%s' "$ISSUE_BODY" | \ sed -n '/^---$/,/^---$/p' | grep '^model:' | awk '{print $2}' | tr -d '"' || true) if [ -n "$YAML_MODEL" ]; then export CLAUDE_MODEL="$YAML_MODEL" log "model from front matter: ${YAML_MODEL}" fi # --- Resolve bot username(s) for comment filtering --- _bot_login=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API%%/repos*}/user" | jq -r '.login // empty' 2>/dev/null || true) # Build list: token owner + any extra names from FORGE_BOT_USERNAMES (comma-separated) _bot_logins="${_bot_login}" if [ -n "${FORGE_BOT_USERNAMES:-}" ]; then _bot_logins="${_bot_logins:+${_bot_logins},}${FORGE_BOT_USERNAMES}" fi # --- Fetch existing comments (resume context, excluding bot comments) --- COMMENTS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/issues/${ISSUE}/comments?limit=50") || true PRIOR_COMMENTS="" if [ -n "$COMMENTS_JSON" ] && [ "$COMMENTS_JSON" != "null" ] && [ "$COMMENTS_JSON" != "[]" ]; then PRIOR_COMMENTS=$(printf '%s' "$COMMENTS_JSON" | \ jq -r --arg bots "$_bot_logins" \ '($bots | split(",") | map(select(. != ""))) as $bl | .[] | select(.user.login as $u | $bl | index($u) | not) | "[\(.user.login) at \(.created_at[:19])]\n\(.body)\n---"' 2>/dev/null || true) fi # --- Create Matrix thread for this issue --- ISSUE_URL="${FORGE_WEB}/issues/${ISSUE}" _thread_id=$(matrix_send_ctx "action" \ "⚡ Action #${ISSUE}: ${ISSUE_TITLE} — ${ISSUE_URL}" \ "⚡ Action #${ISSUE}: ${ISSUE_TITLE}") || true THREAD_ID="" if [ -n "${_thread_id:-}" ]; then printf '%s' "$_thread_id" > "$THREAD_FILE" THREAD_ID="$_thread_id" # Export for on-stop-matrix.sh hook (streams Claude output to thread) export MATRIX_THREAD_ID="$_thread_id" # Register thread root in map for listener dispatch (column 4 = issue number) printf '%s\t%s\t%s\t%s\t%s\n' "$_thread_id" "action" "$(date +%s)" "${ISSUE}" "${PROJECT_NAME}" \ >> "${MATRIX_THREAD_MAP:-/tmp/matrix-thread-map}" 2>/dev/null || true fi # --- Create isolated worktree --- log "creating worktree: ${WORKTREE}" cd "${PROJECT_REPO_ROOT}" git fetch origin "${PRIMARY_BRANCH}" 2>/dev/null || true if ! git worktree add "$WORKTREE" "origin/${PRIMARY_BRANCH}" 2>&1; then log "ERROR: worktree creation failed" notify "failed to create worktree for #${ISSUE}" exit 1 fi log "worktree ready: ${WORKTREE}" # --- Read scratch file (compaction survival) --- SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") # --- Build initial prompt --- PRIOR_SECTION="" if [ -n "$PRIOR_COMMENTS" ]; then PRIOR_SECTION="## Prior comments (resume context) ${PRIOR_COMMENTS} " fi THREAD_HINT="" if [ -n "$THREAD_ID" ]; then THREAD_HINT=" The Matrix thread ID for this issue is: ${THREAD_ID} Use it as the thread_event_id when sending Matrix messages so replies are routed back to this session." fi # Build phase protocol from shared function (Path B covered in Instructions section above) PHASE_PROTOCOL_INSTRUCTIONS="$(build_phase_protocol_prompt "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "$BRANCH")" # Write phase protocol to context file for compaction survival write_compact_context "$PHASE_FILE" "$PHASE_PROTOCOL_INSTRUCTIONS" INITIAL_PROMPT="You are an action agent. Your job is to execute the action formula in the issue below. ## Issue #${ISSUE}: ${ISSUE_TITLE} ${ISSUE_BODY} ${SCRATCH_CONTEXT} ${PRIOR_SECTION}## Instructions 1. Read the action formula steps in the issue body carefully. 2. Execute each step in order using your Bash tool and any other tools available. 3. Post progress as comments on issue #${ISSUE} after significant steps: curl -sf -X POST \\ -H \"Authorization: token \${FORGE_TOKEN}\" \\ -H 'Content-Type: application/json' \\ \"${FORGE_API}/issues/${ISSUE}/comments\" \\ -d \"{\\\"body\\\": \\\"your comment here\\\"}\" 4. If a step requires human input or approval, send a Matrix message explaining what you need, then wait. A human will reply and the reply will be injected into this session automatically.${THREAD_HINT} ### Path A: If this action produces code changes (e.g. config updates, baselines): - You are already in an isolated worktree at: ${WORKTREE} - Create and switch to branch: git checkout -b ${BRANCH} - Make your changes, commit, and push: git push origin ${BRANCH} - **IMPORTANT:** The worktree is destroyed after completion. Push all results before signaling done — unpushed work will be lost. - Follow the phase protocol below — the orchestrator handles PR creation, CI monitoring, and review injection. ### Path B: If this action produces no code changes (investigation, report): - Post results as a comment on issue #${ISSUE}. - **IMPORTANT:** The worktree is destroyed after completion. Copy any files you need to persistent paths before signaling done. - Close the issue: curl -sf -X PATCH \\ -H \"Authorization: token \${FORGE_TOKEN}\" \\ -H 'Content-Type: application/json' \\ \"${FORGE_API}/issues/${ISSUE}\" \\ -d '{\"state\": \"closed\"}' - Signal completion: echo \"PHASE:done\" > \"${PHASE_FILE}\" 5. Environment variables available in your bash sessions: FORGE_TOKEN, FORGE_API, FORGE_REPO, FORGE_WEB, PROJECT_NAME (all sourced from ${FACTORY_ROOT}/.env) ### CRITICAL: Never embed secrets in issue bodies, comments, or PR descriptions - NEVER put API keys, tokens, passwords, or private keys in issue text or comments. - Always reference secrets via env var names (e.g. \\\$BASE_RPC_URL, \\\${FORGE_TOKEN}). - If a formula step needs a secret, read it from .env or the environment at runtime. - Before posting any comment, verify it contains no credentials, hex keys > 32 chars, or URLs with embedded API keys. If the prior comments above show work already completed, resume from where it left off. ${SCRATCH_INSTRUCTION} ${PHASE_PROTOCOL_INSTRUCTIONS}" # --- Create tmux session --- log "creating tmux session: ${SESSION_NAME}" if ! create_agent_session "${SESSION_NAME}" "${WORKTREE}" "${PHASE_FILE}"; then log "ERROR: failed to create tmux session" exit 1 fi # --- Inject initial prompt --- inject_formula "${SESSION_NAME}" "${INITIAL_PROMPT}" log "initial prompt injected into session" matrix_send "action" "⚡ #${ISSUE}: session started — ${ISSUE_TITLE}" \ "${THREAD_ID}" 2>/dev/null || true # --- Wall-clock lifetime watchdog (background) --- # Caps total session time independently of idle timeout. When the cap is # hit the watchdog kills the tmux session, posts a summary comment on the # issue, and writes PHASE:failed so monitor_phase_loop exits. _lifetime_watchdog() { local remaining=$(( MAX_LIFETIME - ($(date +%s) - SESSION_START_EPOCH) )) [ "$remaining" -le 0 ] && remaining=1 sleep "$remaining" local hours=$(( MAX_LIFETIME / 3600 )) log "MAX_LIFETIME (${hours}h) reached — killing session" agent_kill_session "$SESSION_NAME" # Post summary comment on issue local body="Action session killed: wall-clock lifetime cap (${hours}h) reached." curl -sf -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${ISSUE}/comments" \ -d "{\"body\": \"${body}\"}" >/dev/null 2>&1 || true printf 'PHASE:failed\nReason: max_lifetime (%sh) reached\n' "$hours" > "$PHASE_FILE" # Touch phase-changed marker so monitor_phase_loop picks up immediately touch "/tmp/phase-changed-${SESSION_NAME}.marker" } _lifetime_watchdog & LIFETIME_WATCHDOG_PID=$! # --- Monitor phase loop (shared with dev-agent) --- status "monitoring phase: ${PHASE_FILE} (action agent)" monitor_phase_loop "$PHASE_FILE" "$IDLE_TIMEOUT" _on_phase_change "$SESSION_NAME" # Handle exit reason from monitor_phase_loop case "${_MONITOR_LOOP_EXIT:-}" in idle_timeout) notify_ctx \ "session idle for $((IDLE_TIMEOUT / 3600))h — killed" \ "session idle for $((IDLE_TIMEOUT / 3600))h — killed" # Post diagnostic comment + label blocked post_blocked_diagnostic "idle_timeout" rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$THREAD_FILE" "$SCRATCH_FILE" ;; idle_prompt) # Notification + blocked label already handled by _on_phase_change(PHASE:failed) callback rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$THREAD_FILE" "$SCRATCH_FILE" ;; PHASE:failed) # Check if this was a max_lifetime kill (phase file contains the reason) if grep -q 'max_lifetime' "$PHASE_FILE" 2>/dev/null; then notify_ctx \ "session killed — wall-clock cap ($((MAX_LIFETIME / 3600))h) reached" \ "session killed — wall-clock cap ($((MAX_LIFETIME / 3600))h) reached" post_blocked_diagnostic "max_lifetime" fi rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$THREAD_FILE" "$SCRATCH_FILE" ;; done) # Belt-and-suspenders: callback handles primary cleanup, # but ensure sentinel files are removed if callback was interrupted rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$THREAD_FILE" "$SCRATCH_FILE" ;; esac log "action-agent finished for issue #${ISSUE}"