disinto/lib/agent-sdk.sh
Agent d653680d64
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/smoke-init Pipeline was successful
fix: fix: standardize logging across all agents — capture errors, log exit codes, consistent format (#367)
2026-04-07 21:15:36 +00:00

116 lines
4.9 KiB
Bash

#!/usr/bin/env bash
# agent-sdk.sh — Shared SDK for synchronous Claude agent invocations
#
# Provides agent_run(): one-shot `claude -p` with session persistence.
# Source this from any agent script after defining:
# SID_FILE — path to persist session ID (e.g. /tmp/dev-session-proj-123.sid)
# LOGFILE — path for log output
# log() — logging function
#
# Usage:
# source "$(dirname "$0")/../lib/agent-sdk.sh"
# agent_run [--resume SESSION_ID] [--worktree DIR] PROMPT
#
# After each call, _AGENT_SESSION_ID holds the session ID (also saved to SID_FILE).
# Call agent_recover_session() on startup to restore a previous session.
set -euo pipefail
_AGENT_SESSION_ID=""
# agent_recover_session — restore session_id from SID_FILE if it exists.
# Call this before agent_run --resume to enable session continuity.
agent_recover_session() {
if [ -f "$SID_FILE" ]; then
_AGENT_SESSION_ID=$(cat "$SID_FILE")
log "agent_recover_session: ${_AGENT_SESSION_ID:0:12}..."
fi
}
# agent_run — synchronous Claude invocation (one-shot claude -p)
# Usage: agent_run [--resume SESSION_ID] [--worktree DIR] PROMPT
# Sets: _AGENT_SESSION_ID (updated each call, persisted to SID_FILE)
agent_run() {
local resume_id="" worktree_dir=""
while [[ "${1:-}" == --* ]]; do
case "$1" in
--resume) shift; resume_id="${1:-}"; shift ;;
--worktree) shift; worktree_dir="${1:-}"; shift ;;
*) shift ;;
esac
done
local prompt="${1:-}"
local -a args=(-p "$prompt" --output-format json --dangerously-skip-permissions --max-turns 200)
[ -n "$resume_id" ] && args+=(--resume "$resume_id")
[ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL")
local run_dir="${worktree_dir:-$(pwd)}"
local lock_file="${HOME}/.claude/session.lock"
mkdir -p "$(dirname "$lock_file")"
local output rc
log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})"
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$?
if [ "$rc" -eq 124 ]; then
log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)"
elif [ "$rc" -ne 0 ]; then
log "agent_run: claude exited with code $rc"
# Log last 3 lines of output for diagnostics
if [ -n "$output" ]; then
log "agent_run: last output lines: $(echo "$output" | tail -3)"
fi
fi
if [ -z "$output" ]; then
log "agent_run: empty output (claude may have crashed or failed, exit code: $rc)"
fi
# Extract and persist session_id
local new_sid
new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true
if [ -n "$new_sid" ]; then
_AGENT_SESSION_ID="$new_sid"
printf '%s' "$new_sid" > "$SID_FILE"
log "agent_run: session_id=${new_sid:0:12}..."
fi
# Save output for diagnostics (no_push, crashes)
_AGENT_LAST_OUTPUT="$output"
local diag_file="${DISINTO_LOG_DIR:-/tmp}/dev/agent-run-last.json"
printf '%s' "$output" > "$diag_file" 2>/dev/null || true
# Nudge: if the model stopped without pushing, resume with encouragement.
# Some models emit end_turn prematurely when confused. A nudge often unsticks them.
if [ -n "$_AGENT_SESSION_ID" ] && [ -n "$output" ]; then
local has_changes
has_changes=$(cd "$run_dir" && git status --porcelain 2>/dev/null | head -1) || true
local has_pushed
has_pushed=$(cd "$run_dir" && git log --oneline "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH:-main}..HEAD" 2>/dev/null | head -1) || true
if [ -z "$has_pushed" ]; then
if [ -n "$has_changes" ]; then
# Nudge: there are uncommitted changes
local nudge="You stopped but did not push any code. You have uncommitted changes. Commit them and push."
log "agent_run: nudging (uncommitted changes)"
local nudge_rc
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
if [ "$nudge_rc" -eq 124 ]; then
log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)"
elif [ "$nudge_rc" -ne 0 ]; then
log "agent_run: nudge claude exited with code $nudge_rc"
# Log last 3 lines of output for diagnostics
if [ -n "$output" ]; then
log "agent_run: nudge last output lines: $(echo "$output" | tail -3)"
fi
fi
new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true
if [ -n "$new_sid" ]; then
_AGENT_SESSION_ID="$new_sid"
printf '%s' "$new_sid" > "$SID_FILE"
fi
printf '%s' "$output" > "$diag_file" 2>/dev/null || true
_AGENT_LAST_OUTPUT="$output"
else
log "agent_run: no push and no changes — skipping nudge"
fi
fi
fi
}