fix: Migrate action-agent.sh to SDK + shared libraries (#5) #13

Merged
dev-bot merged 2 commits from fix/issue-5 into main 2026-03-28 11:25:53 +00:00
3 changed files with 140 additions and 164 deletions

View file

@ -199,9 +199,9 @@ check_script lib/ci-debug.sh
check_script lib/parse-deps.sh
# Agent scripts — list cross-sourced files where function scope flows across files.
# phase-handler.sh calls helpers defined by its sourcing agent (action-agent.sh).
# phase-handler.sh defines default callback stubs; sourcing agents may override.
check_script dev/dev-agent.sh
check_script dev/phase-handler.sh action/action-agent.sh lib/secret-scan.sh
check_script dev/phase-handler.sh lib/secret-scan.sh
check_script dev/dev-poll.sh
check_script dev/phase-test.sh
check_script gardener/gardener-run.sh
@ -215,7 +215,7 @@ check_script vault/vault-fire.sh
check_script vault/vault-poll.sh
check_script vault/vault-reject.sh
check_script action/action-poll.sh
check_script action/action-agent.sh dev/phase-handler.sh
check_script action/action-agent.sh
check_script supervisor/supervisor-run.sh
check_script supervisor/preflight.sh
check_script predictor/predictor-run.sh

View file

@ -1,73 +1,72 @@
#!/usr/bin/env bash
# action-agent.sh — Autonomous action agent: tmux + Claude + action formula
# =============================================================================
# action-agent.sh — Synchronous action agent: SDK + shared libraries
#
# Synchronous bash loop using claude -p (one-shot invocation).
# No tmux sessions, no phase files — the bash script IS the state machine.
#
# Usage: ./action-agent.sh <issue-number> [project.toml]
#
# Lifecycle:
# 1. Fetch issue body (action formula) + existing comments
# 2. Create isolated git worktree: /tmp/action-{issue}-{timestamp}
# 3. Create tmux session: action-{project}-{issue_num} with interactive claude in worktree
# 4. Inject initial prompt: formula + comments + phase protocol instructions
# 5. Monitor phase file via monitor_phase_loop (shared with dev-agent)
# Path A (git output): Claude pushes → handler creates PR → CI poll → review
# injection → merge → cleanup (same loop as dev-agent via phase-handler.sh)
# Path B (no git output): Claude posts results → PHASE:done → cleanup
# 6. For human input: Claude writes PHASE:escalate; human responds via vault/forge
# 7. Cleanup on terminal phase: kill children, destroy worktree, remove temp files
# Flow:
# 1. Preflight: issue_check_deps(), memory guard, concurrency lock
# 2. Parse model from YAML front matter in issue body (custom model selection)
# 3. Worktree: worktree_create() for action isolation
# 4. Load formula from issue body
# 5. Build prompt: formula + prior non-bot comments (resume context)
# 6. agent_run(worktree, prompt) → Claude executes action, may push
# 7. If pushed: pr_walk_to_merge() from lib/pr-lifecycle.sh
# 8. Cleanup: worktree_cleanup(), issue_close()
#
# Key principle: The runtime creates and destroys. The formula preserves.
# The formula must push results before signaling done — the worktree is nuked after.
# Action-specific (stays in runner):
# - YAML front matter parsing (model selection)
# - Bot username filtering for prior comments
# - Lifetime watchdog (MAX_LIFETIME=8h wall-clock cap)
# - Child process cleanup (docker compose, background jobs)
#
# From shared libraries:
# - Issue lifecycle: lib/issue-lifecycle.sh
# - Worktree: lib/worktree.sh
# - PR lifecycle: lib/pr-lifecycle.sh
# - Agent SDK: lib/agent-sdk.sh
#
# Session: action-{project}-{issue_num} (tmux)
# Log: action/action-poll-{project}.log
# =============================================================================
set -euo pipefail
ISSUE="${1:?Usage: action-agent.sh <issue-number> [project.toml]}"
export PROJECT_TOML="${2:-${PROJECT_TOML:-}}"
source "$(dirname "$0")/../lib/env.sh"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
# shellcheck source=../lib/env.sh
source "$FACTORY_ROOT/lib/env.sh"
# Use action-bot's own Forgejo identity (#747)
FORGE_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}"
source "$(dirname "$0")/../lib/ci-helpers.sh"
source "$(dirname "$0")/../lib/agent-session.sh"
source "$(dirname "$0")/../lib/formula-session.sh"
source "$(dirname "$0")/../lib/worktree.sh"
# shellcheck source=../dev/phase-handler.sh
source "$(dirname "$0")/../dev/phase-handler.sh"
SESSION_NAME="action-${PROJECT_NAME}-${ISSUE}"
# shellcheck source=../lib/ci-helpers.sh
source "$FACTORY_ROOT/lib/ci-helpers.sh"
# shellcheck source=../lib/worktree.sh
source "$FACTORY_ROOT/lib/worktree.sh"
# shellcheck source=../lib/issue-lifecycle.sh
source "$FACTORY_ROOT/lib/issue-lifecycle.sh"
# shellcheck source=../lib/agent-sdk.sh
source "$FACTORY_ROOT/lib/agent-sdk.sh"
# shellcheck source=../lib/pr-lifecycle.sh
source "$FACTORY_ROOT/lib/pr-lifecycle.sh"
BRANCH="action/issue-${ISSUE}"
WORKTREE="/tmp/action-${ISSUE}-$(date +%s)"
LOCKFILE="/tmp/action-agent-${ISSUE}.lock"
LOGFILE="${DISINTO_LOG_DIR}/action/action-poll-${PROJECT_NAME:-default}.log"
IDLE_TIMEOUT="${ACTION_IDLE_TIMEOUT:-14400}" # 4h default
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
SID_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.sid"
MAX_LIFETIME="${ACTION_MAX_LIFETIME:-28800}" # 8h default wall-clock cap
SESSION_START_EPOCH=$(date +%s)
# --- Phase handler globals (agent-specific; defaults in phase-handler.sh) ---
# shellcheck disable=SC2034 # used by phase-handler.sh
API="${FORGE_API}"
BRANCH="action/issue-${ISSUE}"
# shellcheck disable=SC2034 # used by phase-handler.sh
WORKTREE="/tmp/action-${ISSUE}-$(date +%s)"
PHASE_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.phase"
IMPL_SUMMARY_FILE="/tmp/action-impl-summary-${PROJECT_NAME:-default}-${ISSUE}.txt"
PREFLIGHT_RESULT="/tmp/action-preflight-${ISSUE}.json"
SCRATCH_FILE="/tmp/action-${ISSUE}-scratch.md"
log() {
printf '[%s] action#%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$ISSUE" "$*" >> "$LOGFILE"
}
status() {
log "$*"
}
# --- Action-specific helpers for phase-handler.sh ---
cleanup_worktree() {
worktree_cleanup "$WORKTREE"
log "destroyed worktree: ${WORKTREE}"
}
cleanup_labels() { :; } # action agent doesn't use in-progress labels
# --- Concurrency lock (per issue) ---
if [ -f "$LOCKFILE" ]; then
LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "")
@ -87,7 +86,6 @@ cleanup() {
wait "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true
fi
rm -f "$LOCKFILE"
agent_kill_session "$SESSION_NAME"
# Kill any remaining child processes spawned during the run
local children
children=$(jobs -p 2>/dev/null) || true
@ -100,23 +98,17 @@ cleanup() {
# Best-effort docker cleanup for containers started during this action
(cd "${WORKTREE}" 2>/dev/null && docker compose down 2>/dev/null) || true
# Preserve worktree on crash for debugging; clean up on success
local final_phase=""
[ -f "$PHASE_FILE" ] && final_phase=$(head -1 "$PHASE_FILE" 2>/dev/null || true)
if [ "${final_phase:-}" = "PHASE:crashed" ] || [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ] || [ "$exit_code" -ne 0 ]; then
worktree_preserve "$WORKTREE" "crashed (exit=$exit_code, phase=${final_phase:-unknown})"
if [ "$exit_code" -ne 0 ]; then
worktree_preserve "$WORKTREE" "crashed (exit=$exit_code)"
else
cleanup_worktree
worktree_cleanup "$WORKTREE"
fi
rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$PREFLIGHT_RESULT"
rm -f "$SID_FILE"
}
trap cleanup EXIT
# --- Memory guard ---
AVAIL_MB=$(awk '/MemAvailable/ {printf "%d", $2/1024}' /proc/meminfo)
if [ "$AVAIL_MB" -lt 2000 ]; then
log "SKIP: only ${AVAIL_MB}MB available (need 2000MB)"
exit 0
fi
memory_guard 2000
# --- Fetch issue ---
log "fetching issue #${ISSUE}"
@ -139,25 +131,10 @@ fi
log "Issue: ${ISSUE_TITLE}"
# --- Dependency check (skip before spawning Claude) ---
DEPS=$(printf '%s' "$ISSUE_BODY" | bash "${FACTORY_ROOT}/lib/parse-deps.sh")
if [ -n "$DEPS" ]; then
ALL_MET=true
while IFS= read -r dep; do
[ -z "$dep" ] && continue
DEP_STATE=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${dep}" | jq -r '.state // "open"') || DEP_STATE="open"
if [ "$DEP_STATE" != "closed" ]; then
log "SKIP: dependency #${dep} still open — not spawning session"
ALL_MET=false
break
fi
done <<< "$DEPS"
if [ "$ALL_MET" = false ]; then
rm -f "$LOCKFILE"
# --- Dependency check (shared library) ---
if ! issue_check_deps "$ISSUE"; then
log "SKIP: issue #${ISSUE} blocked by: ${_ISSUE_BLOCKED_BY[*]}"
exit 0
fi
log "all dependencies met"
fi
# --- Extract model from YAML front matter (if present) ---
@ -191,28 +168,23 @@ if [ -n "$COMMENTS_JSON" ] && [ "$COMMENTS_JSON" != "null" ] && [ "$COMMENTS_JSO
"[\(.user.login) at \(.created_at[:19])]\n\(.body)\n---"' 2>/dev/null || true)
fi
# --- Create isolated worktree ---
log "creating worktree: ${WORKTREE}"
# --- Determine git remote ---
cd "${PROJECT_REPO_ROOT}"
# Determine which git remote corresponds to FORGE_URL
_forge_host=$(echo "$FORGE_URL" | sed 's|https\?://||; s|/.*||')
FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}')
FORGE_REMOTE="${FORGE_REMOTE:-origin}"
export FORGE_REMOTE
# --- Create isolated worktree ---
log "creating worktree: ${WORKTREE}"
git fetch "${FORGE_REMOTE}" "${PRIMARY_BRANCH}" 2>/dev/null || true
if ! git worktree add "$WORKTREE" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" 2>&1; then
if ! worktree_create "$WORKTREE" "$BRANCH"; then
log "ERROR: worktree creation failed"
exit 1
fi
log "worktree ready: ${WORKTREE}"
# --- Read scratch file (compaction survival) ---
SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE")
SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE")
# --- Build initial prompt ---
# --- Build prompt ---
PRIOR_SECTION=""
if [ -n "$PRIOR_COMMENTS" ]; then
PRIOR_SECTION="## Prior comments (resume context)
@ -222,19 +194,15 @@ ${PRIOR_COMMENTS}
"
fi
# Build phase protocol from shared function (Path B covered in Instructions section above)
PHASE_PROTOCOL_INSTRUCTIONS="$(build_phase_protocol_prompt "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "$BRANCH")"
GIT_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE")
# Write phase protocol to context file for compaction survival
write_compact_context "$PHASE_FILE" "$PHASE_PROTOCOL_INSTRUCTIONS"
INITIAL_PROMPT="You are an action agent. Your job is to execute the action formula
PROMPT="You are an action agent. Your job is to execute the action formula
in the issue below.
## Issue #${ISSUE}: ${ISSUE_TITLE}
${ISSUE_BODY}
${SCRATCH_CONTEXT}
${PRIOR_SECTION}## Instructions
1. Read the action formula steps in the issue body carefully.
@ -248,29 +216,20 @@ ${PRIOR_SECTION}## Instructions
\"${FORGE_API}/issues/${ISSUE}/comments\" \\
-d \"{\\\"body\\\": \\\"your comment here\\\"}\"
4. If a step requires human input or approval, write PHASE:escalate with a reason.
A human will review and respond via the forge.
4. If a step requires human input or approval, post a comment explaining what
is needed and stop — the orchestrator will block the issue.
### Path A: If this action produces code changes (e.g. config updates, baselines):
- You are already in an isolated worktree at: ${WORKTREE}
- Create and switch to branch: git checkout -b ${BRANCH}
- You are on branch: ${BRANCH}
- Make your changes, commit, and push: git push ${FORGE_REMOTE} ${BRANCH}
- **IMPORTANT:** The worktree is destroyed after completion. Push all
results before signaling done — unpushed work will be lost.
- Follow the phase protocol below — the orchestrator handles PR creation,
CI monitoring, and review injection.
results before finishing — unpushed work will be lost.
### Path B: If this action produces no code changes (investigation, report):
- Post results as a comment on issue #${ISSUE}.
- **IMPORTANT:** The worktree is destroyed after completion. Copy any
files you need to persistent paths before signaling done.
- Close the issue:
curl -sf -X PATCH \\
-H \"Authorization: token \${FORGE_TOKEN}\" \\
-H 'Content-Type: application/json' \\
\"${FORGE_API}/issues/${ISSUE}\" \\
-d '{\"state\": \"closed\"}'
- Signal completion: echo \"PHASE:done\" > \"${PHASE_FILE}\"
files you need to persistent paths before finishing.
5. Environment variables available in your bash sessions:
FORGE_TOKEN, FORGE_API, FORGE_REPO, FORGE_WEB, PROJECT_NAME
@ -286,73 +245,79 @@ ${PRIOR_SECTION}## Instructions
If the prior comments above show work already completed, resume from where it
left off.
${SCRATCH_INSTRUCTION}
${PHASE_PROTOCOL_INSTRUCTIONS}"
# --- Create tmux session ---
log "creating tmux session: ${SESSION_NAME}"
if ! create_agent_session "${SESSION_NAME}" "${WORKTREE}" "${PHASE_FILE}"; then
log "ERROR: failed to create tmux session"
exit 1
fi
# --- Inject initial prompt ---
inject_formula "${SESSION_NAME}" "${INITIAL_PROMPT}"
log "initial prompt injected into session"
${GIT_INSTRUCTIONS}"
# --- Wall-clock lifetime watchdog (background) ---
# Caps total session time independently of idle timeout. When the cap is
# hit the watchdog kills the tmux session, posts a summary comment on the
# issue, and writes PHASE:failed so monitor_phase_loop exits.
# Caps total run time independently of claude -p timeout. When the cap is
# hit the watchdog kills the main process, which triggers cleanup via trap.
_lifetime_watchdog() {
local remaining=$(( MAX_LIFETIME - ($(date +%s) - SESSION_START_EPOCH) ))
[ "$remaining" -le 0 ] && remaining=1
sleep "$remaining"
local hours=$(( MAX_LIFETIME / 3600 ))
log "MAX_LIFETIME (${hours}h) reached — killing session"
agent_kill_session "$SESSION_NAME"
log "MAX_LIFETIME (${hours}h) reached — killing agent"
# Post summary comment on issue
local body="Action session killed: wall-clock lifetime cap (${hours}h) reached."
local body="Action agent killed: wall-clock lifetime cap (${hours}h) reached."
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${FORGE_API}/issues/${ISSUE}/comments" \
-d "{\"body\": \"${body}\"}" >/dev/null 2>&1 || true
printf 'PHASE:failed\nReason: max_lifetime (%sh) reached\n' "$hours" > "$PHASE_FILE"
# Touch phase-changed marker so monitor_phase_loop picks up immediately
touch "/tmp/phase-changed-${SESSION_NAME}.marker"
kill $$ 2>/dev/null || true
}
_lifetime_watchdog &
LIFETIME_WATCHDOG_PID=$!
# --- Monitor phase loop (shared with dev-agent) ---
status "monitoring phase: ${PHASE_FILE} (action agent)"
monitor_phase_loop "$PHASE_FILE" "$IDLE_TIMEOUT" _on_phase_change "$SESSION_NAME"
# --- Run agent ---
log "running agent (worktree: ${WORKTREE})"
agent_run --worktree "$WORKTREE" "$PROMPT"
log "agent_run complete"
# Handle exit reason from monitor_phase_loop
case "${_MONITOR_LOOP_EXIT:-}" in
idle_timeout)
# Post diagnostic comment + label blocked
post_blocked_diagnostic "idle_timeout"
rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE"
;;
idle_prompt)
# Notification + blocked label already handled by _on_phase_change(PHASE:failed) callback
rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE"
;;
PHASE:failed)
# Check if this was a max_lifetime kill (phase file contains the reason)
if grep -q 'max_lifetime' "$PHASE_FILE" 2>/dev/null; then
post_blocked_diagnostic "max_lifetime"
# --- Detect if branch was pushed (Path A vs Path B) ---
PUSHED=false
# Check if remote branch exists
git fetch "${FORGE_REMOTE}" "$BRANCH" 2>/dev/null || true
if git rev-parse --verify "${FORGE_REMOTE}/${BRANCH}" >/dev/null 2>&1; then
PUSHED=true
fi
# Fallback: check local commits ahead of base
if [ "$PUSHED" = false ]; then
if git -C "$WORKTREE" log "${FORGE_REMOTE}/${PRIMARY_BRANCH}..${BRANCH}" --oneline 2>/dev/null | grep -q .; then
PUSHED=true
fi
rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE"
fi
if [ "$PUSHED" = true ]; then
# --- Path A: code changes pushed — create PR and walk to merge ---
log "branch pushed — creating PR"
PR_NUMBER=""
PR_NUMBER=$(pr_create "$BRANCH" "action: ${ISSUE_TITLE}" \
"Closes #${ISSUE}
Automated action execution by action-agent.") || true
if [ -n "$PR_NUMBER" ]; then
log "walking PR #${PR_NUMBER} to merge"
pr_walk_to_merge "$PR_NUMBER" "$_AGENT_SESSION_ID" "$WORKTREE" || true
case "${_PR_WALK_EXIT_REASON:-}" in
merged)
log "PR #${PR_NUMBER} merged — closing issue"
issue_close "$ISSUE"
;;
done)
# Belt-and-suspenders: callback handles primary cleanup,
# but ensure sentinel files are removed if callback was interrupted
rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$SCRATCH_FILE"
*)
log "PR #${PR_NUMBER} not merged (reason: ${_PR_WALK_EXIT_REASON:-unknown})"
issue_block "$ISSUE" "pr_not_merged: ${_PR_WALK_EXIT_REASON:-unknown}"
;;
esac
esac
else
log "ERROR: failed to create PR"
issue_block "$ISSUE" "pr_creation_failed"
fi
else
# --- Path B: no code changes — close issue directly ---
log "no branch pushed — closing issue (Path B)"
issue_close "$ISSUE"
fi
log "action-agent finished for issue #${ISSUE}"

View file

@ -34,6 +34,17 @@ source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh"
# shellcheck source=../lib/mirrors.sh
source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh"
# --- Default callback stubs (agents can override after sourcing) ---
# cleanup_worktree and cleanup_labels are called during phase transitions.
# Provide no-op defaults so phase-handler.sh is self-contained; sourcing
# agents override these with real implementations.
if ! declare -f cleanup_worktree >/dev/null 2>&1; then
cleanup_worktree() { :; }
fi
if ! declare -f cleanup_labels >/dev/null 2>&1; then
cleanup_labels() { :; }
fi
# --- Default globals (agents can override after sourcing) ---
: "${CI_POLL_TIMEOUT:=1800}"
: "${REVIEW_POLL_TIMEOUT:=10800}"