disinto/lib/agent-session.sh

#!/usr/bin/env bash
# agent-session.sh — Shared tmux + Claude interactive session helpers
#
# Source this into agent orchestrator scripts for reusable session management.
#
# Functions:
#   agent_wait_for_claude_ready SESSION_NAME [TIMEOUT_SECS]
#   agent_inject_into_session   SESSION_NAME TEXT
#   agent_kill_session          SESSION_NAME
#   monitor_phase_loop          PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME]

# Wait for the Claude ❯ ready prompt in a tmux pane.
# Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise.
agent_wait_for_claude_ready() {
  local session="$1"
  local timeout="${2:-120}"
  local elapsed=0
  while [ "$elapsed" -lt "$timeout" ]; do
    if tmux capture-pane -t "$session" -p 2>/dev/null | grep -q '❯'; then
      return 0
    fi
    sleep 2
    elapsed=$((elapsed + 2))
  done
  return 1
}

# Paste TEXT into SESSION (waits for Claude to be ready first), then press Enter.
agent_inject_into_session() {
  local session="$1"
  local text="$2"
  local tmpfile
  agent_wait_for_claude_ready "$session" 120 || true
  tmpfile=$(mktemp /tmp/agent-inject-XXXXXX)
  printf '%s' "$text" > "$tmpfile"
  tmux load-buffer -b "agent-inject-$$" "$tmpfile"
  tmux paste-buffer -t "$session" -b "agent-inject-$$"
  sleep 0.5
  tmux send-keys -t "$session" "" Enter
  tmux delete-buffer -b "agent-inject-$$" 2>/dev/null || true
  rm -f "$tmpfile"
}

# Create a tmux session running Claude in the given workdir.
# Returns 0 if session is ready, 1 otherwise.
create_agent_session() {
  local session="$1"
  local workdir="${2:-.}"
  tmux new-session -d -s "$session" -c "$workdir" \
    "claude --dangerously-skip-permissions" 2>/dev/null
  sleep 1
  tmux has-session -t "$session" 2>/dev/null || return 1
  agent_wait_for_claude_ready "$session" 120 || return 1
  return 0
}

# Inject a prompt/formula into a session (alias for agent_inject_into_session).
inject_formula() {
  agent_inject_into_session "$@"
}

# Monitor a phase file, calling a callback on changes and handling idle timeout.
# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, failed, break).
# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME).
#   Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly.
# Args: phase_file idle_timeout_secs callback_fn [session_name]
#   session_name — tmux session to health-check; falls back to $SESSION_NAME global
#
# Idle prompt detection: if Claude returns to the ❯ prompt for 3 consecutive polls
# WITHOUT having written any phase signal, the session is killed and the callback is
# invoked with "PHASE:failed".  This handles the case where Claude completes its work
# but skips the phase protocol entirely.
monitor_phase_loop() {
  local phase_file="$1"
  local idle_timeout="$2"
  local callback="$3"
  local _session="${4:-${SESSION_NAME:-}}"
  # Export resolved session name so callbacks can reference it regardless of
  # which session was passed to monitor_phase_loop (analogous to _MONITOR_LOOP_EXIT).
  export _MONITOR_SESSION="$_session"
  local poll_interval="${PHASE_POLL_INTERVAL:-10}"
  local last_mtime=0
  local idle_elapsed=0
  local idle_pane_count=0

  while true; do
    sleep "$poll_interval"
    idle_elapsed=$(( idle_elapsed + poll_interval ))

    # Session health check
    if ! tmux has-session -t "${_session}" 2>/dev/null; then
      local current_phase
      current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
      case "$current_phase" in
        PHASE:done|PHASE:failed|PHASE:merged)
          ;; # terminal — fall through to phase handler
        *)
          # Call callback with "crashed" — let agent-specific code handle recovery
          if type "${callback}" &>/dev/null; then
            "$callback" "PHASE:crashed"
          fi
          # If callback didn't restart session, break
          if ! tmux has-session -t "${_session}" 2>/dev/null; then
            _MONITOR_LOOP_EXIT="crashed"
            return 1
          fi
          idle_elapsed=0
          idle_pane_count=0
          continue
          ;;
      esac
    fi

    # Check phase file for changes
    local phase_mtime
    phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0)
    local current_phase
    current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)

    if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$last_mtime" ]; then
      # No phase change — check idle timeout
      if [ "$idle_elapsed" -ge "$idle_timeout" ]; then
        _MONITOR_LOOP_EXIT="idle_timeout"
        agent_kill_session "${_session}"
        return 0
      fi
      # Idle prompt detection: Claude finished without writing a phase signal.
      # Only fires when current_phase is empty (no phase ever written).
      # Note: tmux capture-pane captures the full visible pane area, not just the
      # last line.  Prior tool output containing ❯ (e.g. a zsh subshell prompt in
      # Claude's output) could trigger a false positive — the same risk exists in
      # agent_wait_for_claude_ready().  Requiring 3 consecutive polls (≥2 poll
      # intervals of sustained idle) reduces but does not eliminate this risk.
      if [ -z "$current_phase" ] && tmux has-session -t "${_session}" 2>/dev/null && \
         tmux capture-pane -t "${_session}" -p 2>/dev/null | grep -q '❯'; then
        idle_pane_count=$(( idle_pane_count + 1 ))
        if [ "$idle_pane_count" -ge 3 ]; then
          _MONITOR_LOOP_EXIT="idle_prompt"
          # Session is already killed before the callback is invoked.
          # Callbacks that handle PHASE:failed must not assume the session is alive.
          agent_kill_session "${_session}"
          if type "${callback}" &>/dev/null; then
            "$callback" "PHASE:failed"
          fi
          return 0
        fi
      else
        idle_pane_count=0
      fi
      continue
    fi

    # Phase changed
    last_mtime="$phase_mtime"
    # shellcheck disable=SC2034  # read by phase-handler.sh callback
    LAST_PHASE_MTIME="$phase_mtime"
    idle_elapsed=0

    # Terminal phases
    case "$current_phase" in
      PHASE:done|PHASE:merged)
        _MONITOR_LOOP_EXIT="done"
        if type "${callback}" &>/dev/null; then
          "$callback" "$current_phase"
        fi
        return 0
        ;;
      PHASE:failed|PHASE:needs_human)
        _MONITOR_LOOP_EXIT="$current_phase"
        if type "${callback}" &>/dev/null; then
          "$callback" "$current_phase"
        fi
        return 0
        ;;
    esac

    # Non-terminal phase — call callback
    if type "${callback}" &>/dev/null; then
      "$callback" "$current_phase"
    fi
  done
}

# Kill a tmux session gracefully (no-op if not found).
agent_kill_session() {
  local session="${1:-}"
  [ -n "$session" ] && tmux kill-session -t "$session" 2>/dev/null || true
}

# Read the current phase from a phase file, stripped of whitespace.
# Usage: read_phase [file]  — defaults to $PHASE_FILE
read_phase() {
  local file="${1:-${PHASE_FILE:-}}"
  { cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]'
}