#!/usr/bin/env bash # lib/agent-session.sh — Reusable tmux + Claude agent runtime # # Source this in any agent script after lib/env.sh. # # Required globals (set by the caller before using functions): # SESSION_NAME — tmux session name (e.g., "dev-harb-935") # PHASE_FILE — path to phase file # LOGFILE — path to log file # ISSUE — issue/context identifier (used in log prefix) # STATUSFILE — path to status file # THREAD_FILE — path to Matrix thread ID file # WORKTREE — agent working directory (for crash recovery) # PRIMARY_BRANCH — primary git branch (for crash recovery diff) # # Optional globals: # PHASE_POLL_INTERVAL — seconds between phase polls (default: 30) # # Globals exported by monitor_phase_loop (readable by phase callbacks): # LAST_PHASE_MTIME — mtime of the phase file when the current phase was dispatched # _MONITOR_LOOP_EXIT — set on return: "idle_timeout", "crash_recovery_failed", # or "callback_break" # log — Timestamped logging to LOGFILE # Usage: log log() { printf '[%s] #%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "${ISSUE:-?}" "$*" >> "${LOGFILE:-/dev/null}" } # status — Log + write current status to STATUSFILE # Usage: status status() { printf '[%s] agent #%s: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "${ISSUE:-?}" "$*" > "${STATUSFILE:-/dev/null}" log "$*" } # notify — Send plain-text Matrix notification into the issue thread # Usage: notify notify() { local thread_id="" [ -f "${THREAD_FILE:-}" ] && thread_id=$(cat "$THREAD_FILE" 2>/dev/null || true) matrix_send "dev" "🔧 #${ISSUE}: $*" "${thread_id}" 2>/dev/null || true } # notify_ctx — Send rich Matrix notification with HTML context into the issue thread # Falls back to plain send (registering a thread root) when no thread exists. # Usage: notify_ctx notify_ctx() { local plain="$1" html="$2" local thread_id="" [ -f "${THREAD_FILE:-}" ] && thread_id=$(cat "$THREAD_FILE" 2>/dev/null || true) if [ -n "$thread_id" ]; then matrix_send_ctx "dev" "🔧 #${ISSUE}: ${plain}" "🔧 #${ISSUE}: ${html}" "${thread_id}" 2>/dev/null || true else # No thread — fall back to plain send so a thread root is registered matrix_send "dev" "🔧 #${ISSUE}: ${plain}" "" "${ISSUE}" 2>/dev/null || true fi } # read_phase — Read current value from PHASE_FILE, stripping whitespace # Usage: read_phase read_phase() { { cat "${PHASE_FILE}" 2>/dev/null || true; } | head -1 | tr -d '[:space:]' } # wait_for_claude_ready — Poll SESSION_NAME tmux pane until Claude shows ❯ prompt # Usage: wait_for_claude_ready [timeout_seconds] # Returns: 0 if ready, 1 if timeout wait_for_claude_ready() { local timeout="${1:-120}" local elapsed=0 while [ "$elapsed" -lt "$timeout" ]; do # Claude Code shows ❯ when ready for input if tmux capture-pane -t "${SESSION_NAME}" -p 2>/dev/null | grep -q '❯'; then return 0 fi sleep 2 elapsed=$((elapsed + 2)) done log "WARNING: claude not ready after ${timeout}s — proceeding anyway" return 1 } # inject_into_session — Paste text into the tmux session via tmux buffer # Usage: inject_into_session inject_into_session() { local text="$1" local tmpfile wait_for_claude_ready 120 tmpfile=$(mktemp /tmp/tmux-inject-XXXXXX) printf '%s' "$text" > "$tmpfile" tmux load-buffer -b "inject-${ISSUE}" "$tmpfile" tmux paste-buffer -t "${SESSION_NAME}" -b "inject-${ISSUE}" sleep 0.5 tmux send-keys -t "${SESSION_NAME}" "" Enter tmux delete-buffer -b "inject-${ISSUE}" 2>/dev/null || true rm -f "$tmpfile" } # kill_tmux_session — Kill SESSION_NAME tmux session # Usage: kill_tmux_session kill_tmux_session() { tmux kill-session -t "${SESSION_NAME}" 2>/dev/null || true } # create_agent_session — Create (or reuse) a detached tmux session running claude # Sets SESSION_NAME to $1 and uses $2 as the working directory. # Usage: create_agent_session # Returns: 0 on success, 1 on failure create_agent_session() { SESSION_NAME="${1:-${SESSION_NAME}}" local workdir="${2:-${WORKTREE}}" if tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then log "reusing existing tmux session: ${SESSION_NAME}" return 0 fi # Kill any stale entry before creating tmux kill-session -t "${SESSION_NAME}" 2>/dev/null || true tmux new-session -d -s "${SESSION_NAME}" -c "${workdir}" \ "claude --dangerously-skip-permissions" if ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then log "ERROR: failed to create tmux session ${SESSION_NAME}" return 1 fi log "tmux session created: ${SESSION_NAME}" if ! wait_for_claude_ready 120; then log "ERROR: claude did not become ready in ${SESSION_NAME}" kill_tmux_session return 1 fi return 0 } # inject_formula — Send a formula/prompt into the agent session # Usage: inject_formula [context] inject_formula() { SESSION_NAME="${1:-${SESSION_NAME}}" local formula_text="$2" # $3 context is available for future use by callers inject_into_session "$formula_text" } # Globals exported by monitor_phase_loop for use by phase callbacks. # LAST_PHASE_MTIME: mtime of phase file at the time the current phase was dispatched. # _MONITOR_LOOP_EXIT: reason monitor_phase_loop returned — check after the call. LAST_PHASE_MTIME=0 _MONITOR_LOOP_EXIT="" # monitor_phase_loop — Watch PHASE_FILE and dispatch phase changes to a callback # # Handles: phase change detection, idle timeout, and session crash recovery. # The phase callback receives the current phase string as $1. # Return 1 from the callback to break the loop; return 0 (or default) to continue. # # On idle timeout: kills the session, sets _MONITOR_LOOP_EXIT=idle_timeout, breaks. # On crash recovery failure: sets _MONITOR_LOOP_EXIT=crash_recovery_failed, breaks. # On callback return 1: sets _MONITOR_LOOP_EXIT=callback_break, breaks. # # LAST_PHASE_MTIME is updated before each callback invocation so callbacks can # detect subsequent phase file changes (e.g., during inner polling loops). # # Usage: monitor_phase_loop monitor_phase_loop() { local phase_file="${1:-${PHASE_FILE}}" local idle_timeout="${2:-7200}" local callback_fn="${3:-}" local poll_interval="${PHASE_POLL_INTERVAL:-30}" local current_phase phase_mtime crash_diff recovery_msg PHASE_FILE="$phase_file" LAST_PHASE_MTIME=0 _MONITOR_LOOP_EXIT="" local idle_elapsed=0 while true; do sleep "$poll_interval" idle_elapsed=$(( idle_elapsed + poll_interval )) # --- Session health check --- if ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then current_phase=$(read_phase) case "$current_phase" in PHASE:done|PHASE:failed) # Expected terminal phases — fall through to phase dispatch below ;; *) log "WARNING: tmux session died unexpectedly (phase: ${current_phase:-none})" notify "session crashed (phase: ${current_phase:-none}), attempting recovery" # Attempt crash recovery: restart session with recovery context crash_diff=$(git -C "${WORKTREE}" diff "origin/${PRIMARY_BRANCH}..HEAD" --stat 2>/dev/null | head -20 || echo "(no diff)") recovery_msg="## Session Recovery Your Claude Code session for issue #${ISSUE} was interrupted unexpectedly. The git worktree at ${WORKTREE} is intact — your changes survived. Last known phase: ${current_phase:-unknown} Work so far: ${crash_diff} Run: git log --oneline -5 && git status Then resume from the last phase following the original phase protocol. Phase file: ${PHASE_FILE}" if tmux new-session -d -s "${SESSION_NAME}" -c "${WORKTREE}" \ "claude --dangerously-skip-permissions" 2>/dev/null; then inject_into_session "$recovery_msg" log "recovery session started" idle_elapsed=0 else log "ERROR: could not restart session after crash" notify "session crashed and could not recover — needs human attention" _MONITOR_LOOP_EXIT="crash_recovery_failed" break fi continue ;; esac fi # --- Check phase file for changes --- phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0) current_phase=$(read_phase) if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$LAST_PHASE_MTIME" ]; then # No phase change — check idle timeout if [ "$idle_elapsed" -ge "$idle_timeout" ]; then log "TIMEOUT: no phase update for ${idle_timeout}s — killing session" kill_tmux_session _MONITOR_LOOP_EXIT="idle_timeout" break fi continue fi # Phase changed — update tracking state and dispatch to callback LAST_PHASE_MTIME="$phase_mtime" idle_elapsed=0 log "phase: ${current_phase}" status "${current_phase}" if [ -n "$callback_fn" ]; then if ! "$callback_fn" "$current_phase"; then _MONITOR_LOOP_EXIT="callback_break" break fi fi done }