refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
#!/usr/bin/env bash
|
2026-03-18 16:21:07 +01:00
|
|
|
|
# agent-session.sh — Shared tmux + Claude interactive session helpers
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
#
|
2026-03-18 16:21:07 +01:00
|
|
|
|
# Source this into agent orchestrator scripts for reusable session management.
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
#
|
2026-03-18 16:21:07 +01:00
|
|
|
|
# Functions:
|
|
|
|
|
|
# agent_wait_for_claude_ready SESSION_NAME [TIMEOUT_SECS]
|
|
|
|
|
|
# agent_inject_into_session SESSION_NAME TEXT
|
|
|
|
|
|
# agent_kill_session SESSION_NAME
|
2026-03-18 20:15:14 +00:00
|
|
|
|
# monitor_phase_loop PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME]
|
2026-03-26 10:11:50 +00:00
|
|
|
|
# session_lock_acquire [TIMEOUT_SECS]
|
|
|
|
|
|
# session_lock_release
|
|
|
|
|
|
|
|
|
|
|
|
# --- Cooperative session lock (fd-based) ---
|
|
|
|
|
|
# File descriptor for the session lock. Set by create_agent_session().
|
|
|
|
|
|
# Callers can release/re-acquire via session_lock_release/session_lock_acquire
|
|
|
|
|
|
# to allow other Claude sessions during idle phases (awaiting_review/awaiting_ci).
|
|
|
|
|
|
SESSION_LOCK_FD=""
|
|
|
|
|
|
|
|
|
|
|
|
# Release the session lock without closing the file descriptor.
|
|
|
|
|
|
# The fd stays open so it can be re-acquired later.
|
|
|
|
|
|
session_lock_release() {
|
|
|
|
|
|
if [ -n "${SESSION_LOCK_FD:-}" ]; then
|
|
|
|
|
|
flock -u "$SESSION_LOCK_FD"
|
|
|
|
|
|
fi
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Re-acquire the session lock. Blocks until available or timeout.
|
|
|
|
|
|
# Opens the lock fd if not already open (for use by external callers).
|
|
|
|
|
|
# Args: [timeout_secs] (default 300)
|
|
|
|
|
|
# Returns 0 on success, 1 on timeout/error.
|
|
|
|
|
|
# shellcheck disable=SC2120 # timeout arg is used by external callers
|
|
|
|
|
|
session_lock_acquire() {
|
|
|
|
|
|
local timeout="${1:-300}"
|
|
|
|
|
|
if [ -z "${SESSION_LOCK_FD:-}" ]; then
|
|
|
|
|
|
local lock_dir="${HOME}/.claude"
|
|
|
|
|
|
mkdir -p "$lock_dir"
|
|
|
|
|
|
exec {SESSION_LOCK_FD}>>"${lock_dir}/session.lock"
|
|
|
|
|
|
fi
|
|
|
|
|
|
flock -w "$timeout" "$SESSION_LOCK_FD"
|
|
|
|
|
|
}
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
|
2026-03-18 16:21:07 +01:00
|
|
|
|
# Wait for the Claude ❯ ready prompt in a tmux pane.
|
|
|
|
|
|
# Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise.
|
|
|
|
|
|
agent_wait_for_claude_ready() {
|
|
|
|
|
|
local session="$1"
|
|
|
|
|
|
local timeout="${2:-120}"
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
local elapsed=0
|
|
|
|
|
|
while [ "$elapsed" -lt "$timeout" ]; do
|
2026-03-18 16:21:07 +01:00
|
|
|
|
if tmux capture-pane -t "$session" -p 2>/dev/null | grep -q '❯'; then
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
return 0
|
|
|
|
|
|
fi
|
|
|
|
|
|
sleep 2
|
|
|
|
|
|
elapsed=$((elapsed + 2))
|
|
|
|
|
|
done
|
|
|
|
|
|
return 1
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-18 16:21:07 +01:00
|
|
|
|
# Paste TEXT into SESSION (waits for Claude to be ready first), then press Enter.
|
|
|
|
|
|
agent_inject_into_session() {
|
|
|
|
|
|
local session="$1"
|
|
|
|
|
|
local text="$2"
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
local tmpfile
|
2026-03-26 10:11:50 +00:00
|
|
|
|
# Re-acquire session lock before injecting — Claude will resume working
|
|
|
|
|
|
# shellcheck disable=SC2119 # using default timeout
|
|
|
|
|
|
session_lock_acquire || true
|
2026-03-18 16:21:07 +01:00
|
|
|
|
agent_wait_for_claude_ready "$session" 120 || true
|
2026-03-19 14:57:54 +01:00
|
|
|
|
# Clear idle marker — new work incoming
|
|
|
|
|
|
rm -f "/tmp/claude-idle-${session}.ts"
|
2026-03-18 16:21:07 +01:00
|
|
|
|
tmpfile=$(mktemp /tmp/agent-inject-XXXXXX)
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
printf '%s' "$text" > "$tmpfile"
|
2026-03-18 16:21:07 +01:00
|
|
|
|
tmux load-buffer -b "agent-inject-$$" "$tmpfile"
|
|
|
|
|
|
tmux paste-buffer -t "$session" -b "agent-inject-$$"
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
sleep 0.5
|
2026-03-18 16:21:07 +01:00
|
|
|
|
tmux send-keys -t "$session" "" Enter
|
|
|
|
|
|
tmux delete-buffer -b "agent-inject-$$" 2>/dev/null || true
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
rm -f "$tmpfile"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-18 16:21:05 +00:00
|
|
|
|
# Create a tmux session running Claude in the given workdir.
|
2026-03-19 14:57:54 +01:00
|
|
|
|
# Installs a Stop hook for idle detection (see monitor_phase_loop).
|
2026-03-20 00:10:27 +00:00
|
|
|
|
# Installs a PreToolUse hook to guard destructive Bash operations.
|
2026-03-19 17:55:06 +00:00
|
|
|
|
# Optionally installs a PostToolUse hook for phase file write detection.
|
2026-03-20 01:43:00 +00:00
|
|
|
|
# Optionally installs a StopFailure hook for immediate phase file update on API error.
|
2026-03-19 17:55:06 +00:00
|
|
|
|
# Args: session workdir [phase_file]
|
2026-03-18 16:21:05 +00:00
|
|
|
|
# Returns 0 if session is ready, 1 otherwise.
|
|
|
|
|
|
create_agent_session() {
|
|
|
|
|
|
local session="$1"
|
|
|
|
|
|
local workdir="${2:-.}"
|
2026-03-19 17:55:06 +00:00
|
|
|
|
local phase_file="${3:-}"
|
|
|
|
|
|
|
|
|
|
|
|
# Prepare settings directory for hooks
|
|
|
|
|
|
mkdir -p "${workdir}/.claude"
|
|
|
|
|
|
local settings="${workdir}/.claude/settings.json"
|
2026-03-19 14:57:54 +01:00
|
|
|
|
|
|
|
|
|
|
# Install Stop hook for idle detection: when Claude finishes a response,
|
|
|
|
|
|
# the hook writes a timestamp to a marker file. monitor_phase_loop checks
|
|
|
|
|
|
# this marker instead of fragile tmux pane scraping.
|
|
|
|
|
|
local idle_marker="/tmp/claude-idle-${session}.ts"
|
|
|
|
|
|
local hook_script="${FACTORY_ROOT}/lib/hooks/on-idle-stop.sh"
|
|
|
|
|
|
if [ -x "$hook_script" ]; then
|
|
|
|
|
|
local hook_cmd="${hook_script} ${idle_marker}"
|
2026-03-23 09:56:49 +00:00
|
|
|
|
# When a phase file is available, pass it and the session name so the
|
|
|
|
|
|
# hook can nudge Claude if it returns to the prompt without signalling.
|
|
|
|
|
|
if [ -n "$phase_file" ]; then
|
|
|
|
|
|
hook_cmd="${hook_script} ${idle_marker} ${phase_file} ${session}"
|
|
|
|
|
|
fi
|
2026-03-19 14:57:54 +01:00
|
|
|
|
if [ -f "$settings" ]; then
|
|
|
|
|
|
# Append our Stop hook to existing project settings
|
|
|
|
|
|
jq --arg cmd "$hook_cmd" '
|
2026-03-19 21:19:58 +00:00
|
|
|
|
if (.hooks.Stop // [] | any(.[]; .hooks[]?.command == $cmd))
|
|
|
|
|
|
then .
|
|
|
|
|
|
else .hooks.Stop = (.hooks.Stop // []) + [{
|
2026-03-19 14:57:54 +01:00
|
|
|
|
matcher: "",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
2026-03-19 21:19:58 +00:00
|
|
|
|
end
|
2026-03-19 14:57:54 +01:00
|
|
|
|
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
|
|
|
|
|
else
|
|
|
|
|
|
jq -n --arg cmd "$hook_cmd" '{
|
|
|
|
|
|
hooks: {
|
|
|
|
|
|
Stop: [{
|
|
|
|
|
|
matcher: "",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
}
|
|
|
|
|
|
}' > "$settings"
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-03-19 17:55:06 +00:00
|
|
|
|
# Install PostToolUse hook for phase file write detection: when Claude
|
|
|
|
|
|
# writes to the phase file via Bash or Write, the hook writes a marker
|
|
|
|
|
|
# so monitor_phase_loop can react immediately instead of waiting for
|
|
|
|
|
|
# the next mtime-based poll cycle.
|
|
|
|
|
|
if [ -n "$phase_file" ]; then
|
|
|
|
|
|
local phase_marker="/tmp/phase-changed-${session}.marker"
|
|
|
|
|
|
local phase_hook_script="${FACTORY_ROOT}/lib/hooks/on-phase-change.sh"
|
|
|
|
|
|
if [ -x "$phase_hook_script" ]; then
|
|
|
|
|
|
local phase_hook_cmd="${phase_hook_script} ${phase_file} ${phase_marker}"
|
|
|
|
|
|
if [ -f "$settings" ]; then
|
|
|
|
|
|
jq --arg cmd "$phase_hook_cmd" '
|
2026-03-19 21:19:58 +00:00
|
|
|
|
if (.hooks.PostToolUse // [] | any(.[]; .hooks[]?.command == $cmd))
|
|
|
|
|
|
then .
|
|
|
|
|
|
else .hooks.PostToolUse = (.hooks.PostToolUse // []) + [{
|
2026-03-19 17:55:06 +00:00
|
|
|
|
matcher: "Bash|Write",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
2026-03-19 21:19:58 +00:00
|
|
|
|
end
|
2026-03-19 17:55:06 +00:00
|
|
|
|
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
|
|
|
|
|
else
|
|
|
|
|
|
jq -n --arg cmd "$phase_hook_cmd" '{
|
|
|
|
|
|
hooks: {
|
|
|
|
|
|
PostToolUse: [{
|
|
|
|
|
|
matcher: "Bash|Write",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
}
|
|
|
|
|
|
}' > "$settings"
|
|
|
|
|
|
fi
|
2026-03-19 18:14:49 +00:00
|
|
|
|
rm -f "$phase_marker"
|
2026-03-19 17:55:06 +00:00
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-03-20 01:43:00 +00:00
|
|
|
|
# Install StopFailure hook for immediate phase file update on API error:
|
|
|
|
|
|
# when Claude hits a rate limit, server error, billing error, or auth failure,
|
|
|
|
|
|
# the hook writes PHASE:failed to the phase file and touches the phase-changed
|
|
|
|
|
|
# marker so monitor_phase_loop picks it up within one poll cycle instead of
|
|
|
|
|
|
# waiting for idle timeout (up to 2 hours).
|
|
|
|
|
|
if [ -n "$phase_file" ]; then
|
|
|
|
|
|
local stop_failure_hook_script="${FACTORY_ROOT}/lib/hooks/on-stop-failure.sh"
|
|
|
|
|
|
if [ -x "$stop_failure_hook_script" ]; then
|
2026-03-20 01:52:46 +00:00
|
|
|
|
# phase_marker is defined in the PostToolUse block above; redeclare so
|
|
|
|
|
|
# this block is self-contained if that block is ever removed.
|
|
|
|
|
|
local sf_phase_marker="/tmp/phase-changed-${session}.marker"
|
|
|
|
|
|
local stop_failure_hook_cmd="${stop_failure_hook_script} ${phase_file} ${sf_phase_marker}"
|
2026-03-20 01:43:00 +00:00
|
|
|
|
if [ -f "$settings" ]; then
|
|
|
|
|
|
jq --arg cmd "$stop_failure_hook_cmd" '
|
|
|
|
|
|
if (.hooks.StopFailure // [] | any(.[]; .hooks[]?.command == $cmd))
|
|
|
|
|
|
then .
|
|
|
|
|
|
else .hooks.StopFailure = (.hooks.StopFailure // []) + [{
|
|
|
|
|
|
matcher: "rate_limit|server_error|authentication_failed|billing_error",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
end
|
|
|
|
|
|
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
|
|
|
|
|
else
|
|
|
|
|
|
jq -n --arg cmd "$stop_failure_hook_cmd" '{
|
|
|
|
|
|
hooks: {
|
|
|
|
|
|
StopFailure: [{
|
|
|
|
|
|
matcher: "rate_limit|server_error|authentication_failed|billing_error",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
}
|
|
|
|
|
|
}' > "$settings"
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-03-20 00:10:27 +00:00
|
|
|
|
# Install PreToolUse hook for destructive operation guard: blocks force push
|
|
|
|
|
|
# to primary branch, rm -rf outside worktree, direct API merge calls, and
|
|
|
|
|
|
# checkout/switch to primary branch. Claude sees the denial reason on exit 2
|
|
|
|
|
|
# and can self-correct.
|
|
|
|
|
|
local guard_hook_script="${FACTORY_ROOT}/lib/hooks/on-pretooluse-guard.sh"
|
|
|
|
|
|
if [ -x "$guard_hook_script" ]; then
|
|
|
|
|
|
local abs_workdir
|
|
|
|
|
|
abs_workdir=$(cd "$workdir" 2>/dev/null && pwd) || abs_workdir="$workdir"
|
2026-03-21 18:09:28 +00:00
|
|
|
|
local guard_hook_cmd="${guard_hook_script} ${PRIMARY_BRANCH:-main} ${abs_workdir} ${session}"
|
2026-03-20 00:10:27 +00:00
|
|
|
|
if [ -f "$settings" ]; then
|
|
|
|
|
|
jq --arg cmd "$guard_hook_cmd" '
|
|
|
|
|
|
if (.hooks.PreToolUse // [] | any(.[]; .hooks[]?.command == $cmd))
|
|
|
|
|
|
then .
|
|
|
|
|
|
else .hooks.PreToolUse = (.hooks.PreToolUse // []) + [{
|
|
|
|
|
|
matcher: "Bash",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
end
|
|
|
|
|
|
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
|
|
|
|
|
else
|
|
|
|
|
|
jq -n --arg cmd "$guard_hook_cmd" '{
|
|
|
|
|
|
hooks: {
|
|
|
|
|
|
PreToolUse: [{
|
|
|
|
|
|
matcher: "Bash",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
}
|
|
|
|
|
|
}' > "$settings"
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-03-20 01:11:26 +00:00
|
|
|
|
# Install SessionEnd hook for guaranteed cleanup: when the Claude session
|
|
|
|
|
|
# exits (clean or crash), write a termination marker so monitor_phase_loop
|
|
|
|
|
|
# detects the exit faster than tmux has-session polling alone.
|
|
|
|
|
|
local exit_marker="/tmp/claude-exited-${session}.ts"
|
|
|
|
|
|
local session_end_hook_script="${FACTORY_ROOT}/lib/hooks/on-session-end.sh"
|
|
|
|
|
|
if [ -x "$session_end_hook_script" ]; then
|
|
|
|
|
|
local session_end_hook_cmd="${session_end_hook_script} ${exit_marker}"
|
|
|
|
|
|
if [ -f "$settings" ]; then
|
|
|
|
|
|
jq --arg cmd "$session_end_hook_cmd" '
|
|
|
|
|
|
if (.hooks.SessionEnd // [] | any(.[]; .hooks[]?.command == $cmd))
|
|
|
|
|
|
then .
|
|
|
|
|
|
else .hooks.SessionEnd = (.hooks.SessionEnd // []) + [{
|
|
|
|
|
|
matcher: "",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
end
|
|
|
|
|
|
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
|
|
|
|
|
else
|
|
|
|
|
|
jq -n --arg cmd "$session_end_hook_cmd" '{
|
|
|
|
|
|
hooks: {
|
|
|
|
|
|
SessionEnd: [{
|
|
|
|
|
|
matcher: "",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
}
|
|
|
|
|
|
}' > "$settings"
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
rm -f "$exit_marker"
|
|
|
|
|
|
|
2026-03-20 23:27:32 +00:00
|
|
|
|
# Install SessionStart hook for context re-injection after compaction:
|
|
|
|
|
|
# when Claude Code compacts context during long sessions, the phase protocol
|
|
|
|
|
|
# instructions are lost. This hook fires after each compaction and outputs
|
|
|
|
|
|
# the content of a context file so Claude retains critical instructions.
|
|
|
|
|
|
# The context file is written by callers via write_compact_context().
|
|
|
|
|
|
if [ -n "$phase_file" ]; then
|
|
|
|
|
|
local compact_hook_script="${FACTORY_ROOT}/lib/hooks/on-compact-reinject.sh"
|
|
|
|
|
|
if [ -x "$compact_hook_script" ]; then
|
|
|
|
|
|
local context_file="${phase_file%.phase}.context"
|
|
|
|
|
|
local compact_hook_cmd="${compact_hook_script} ${context_file}"
|
|
|
|
|
|
if [ -f "$settings" ]; then
|
|
|
|
|
|
jq --arg cmd "$compact_hook_cmd" '
|
|
|
|
|
|
if (.hooks.SessionStart // [] | any(.[]; .hooks[]?.command == $cmd))
|
|
|
|
|
|
then .
|
|
|
|
|
|
else .hooks.SessionStart = (.hooks.SessionStart // []) + [{
|
|
|
|
|
|
matcher: "compact",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
end
|
|
|
|
|
|
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
|
|
|
|
|
else
|
|
|
|
|
|
jq -n --arg cmd "$compact_hook_cmd" '{
|
|
|
|
|
|
hooks: {
|
|
|
|
|
|
SessionStart: [{
|
|
|
|
|
|
matcher: "compact",
|
|
|
|
|
|
hooks: [{type: "command", command: $cmd}]
|
|
|
|
|
|
}]
|
|
|
|
|
|
}
|
|
|
|
|
|
}' > "$settings"
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-03-19 14:57:54 +01:00
|
|
|
|
rm -f "$idle_marker"
|
2026-03-20 09:08:17 +00:00
|
|
|
|
local model_flag=""
|
|
|
|
|
|
if [ -n "${CLAUDE_MODEL:-}" ]; then
|
|
|
|
|
|
model_flag="--model ${CLAUDE_MODEL}"
|
|
|
|
|
|
fi
|
2026-03-25 17:48:21 +00:00
|
|
|
|
|
2026-03-26 10:11:50 +00:00
|
|
|
|
# Acquire a session-level mutex via fd-based flock to prevent concurrent
|
|
|
|
|
|
# Claude sessions from racing on OAuth token refresh. Unlike the previous
|
|
|
|
|
|
# command-wrapper flock, the fd approach allows callers to release the lock
|
|
|
|
|
|
# during idle phases (awaiting_review/awaiting_ci) and re-acquire before
|
|
|
|
|
|
# injecting the next prompt. See #724.
|
2026-03-25 17:48:21 +00:00
|
|
|
|
# Use ~/.claude/session.lock so the lock is shared across containers when
|
|
|
|
|
|
# the host ~/.claude directory is bind-mounted.
|
|
|
|
|
|
local lock_dir="${HOME}/.claude"
|
|
|
|
|
|
mkdir -p "$lock_dir"
|
|
|
|
|
|
local claude_lock="${lock_dir}/session.lock"
|
2026-03-26 10:11:50 +00:00
|
|
|
|
if [ -z "${SESSION_LOCK_FD:-}" ]; then
|
|
|
|
|
|
exec {SESSION_LOCK_FD}>>"${claude_lock}"
|
|
|
|
|
|
fi
|
|
|
|
|
|
if ! flock -w 300 "$SESSION_LOCK_FD"; then
|
|
|
|
|
|
return 1
|
|
|
|
|
|
fi
|
|
|
|
|
|
local claude_cmd="claude --dangerously-skip-permissions ${model_flag}"
|
2026-03-25 17:48:21 +00:00
|
|
|
|
|
2026-03-18 16:21:05 +00:00
|
|
|
|
tmux new-session -d -s "$session" -c "$workdir" \
|
2026-03-25 17:48:21 +00:00
|
|
|
|
"$claude_cmd" 2>/dev/null
|
2026-03-18 16:21:05 +00:00
|
|
|
|
sleep 1
|
|
|
|
|
|
tmux has-session -t "$session" 2>/dev/null || return 1
|
|
|
|
|
|
agent_wait_for_claude_ready "$session" 120 || return 1
|
|
|
|
|
|
return 0
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Inject a prompt/formula into a session (alias for agent_inject_into_session).
|
|
|
|
|
|
inject_formula() {
|
|
|
|
|
|
agent_inject_into_session "$@"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-18 16:40:51 +00:00
|
|
|
|
# Monitor a phase file, calling a callback on changes and handling idle timeout.
|
2026-03-23 09:39:48 +00:00
|
|
|
|
# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, crashed, PHASE:failed, PHASE:escalate).
|
2026-03-19 01:05:21 +00:00
|
|
|
|
# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME).
|
|
|
|
|
|
# Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly.
|
2026-03-18 20:15:14 +00:00
|
|
|
|
# Args: phase_file idle_timeout_secs callback_fn [session_name]
|
|
|
|
|
|
# session_name — tmux session to health-check; falls back to $SESSION_NAME global
|
2026-03-19 13:47:10 +01:00
|
|
|
|
#
|
2026-03-19 14:57:54 +01:00
|
|
|
|
# Idle detection: uses a Stop hook marker file (written by lib/hooks/on-idle-stop.sh)
|
|
|
|
|
|
# to detect when Claude finishes responding without writing a phase signal.
|
|
|
|
|
|
# If the marker exists for 3 consecutive polls with no phase written, the session
|
|
|
|
|
|
# is killed and the callback invoked with "PHASE:failed".
|
2026-03-18 16:40:51 +00:00
|
|
|
|
monitor_phase_loop() {
|
|
|
|
|
|
local phase_file="$1"
|
|
|
|
|
|
local idle_timeout="$2"
|
|
|
|
|
|
local callback="$3"
|
2026-03-18 20:15:14 +00:00
|
|
|
|
local _session="${4:-${SESSION_NAME:-}}"
|
2026-03-19 01:05:21 +00:00
|
|
|
|
# Export resolved session name so callbacks can reference it regardless of
|
|
|
|
|
|
# which session was passed to monitor_phase_loop (analogous to _MONITOR_LOOP_EXIT).
|
|
|
|
|
|
export _MONITOR_SESSION="$_session"
|
2026-03-18 16:40:51 +00:00
|
|
|
|
local poll_interval="${PHASE_POLL_INTERVAL:-10}"
|
|
|
|
|
|
local last_mtime=0
|
|
|
|
|
|
local idle_elapsed=0
|
2026-03-19 13:47:10 +01:00
|
|
|
|
local idle_pane_count=0
|
2026-03-18 16:40:51 +00:00
|
|
|
|
|
|
|
|
|
|
while true; do
|
|
|
|
|
|
sleep "$poll_interval"
|
|
|
|
|
|
idle_elapsed=$(( idle_elapsed + poll_interval ))
|
|
|
|
|
|
|
2026-03-20 01:11:26 +00:00
|
|
|
|
# Session health check: SessionEnd hook marker provides fast detection,
|
|
|
|
|
|
# tmux has-session is the fallback for unclean exits (e.g. tmux crash).
|
|
|
|
|
|
local exit_marker="/tmp/claude-exited-${_session}.ts"
|
|
|
|
|
|
if [ -f "$exit_marker" ] || ! tmux has-session -t "${_session}" 2>/dev/null; then
|
2026-03-18 16:40:51 +00:00
|
|
|
|
local current_phase
|
|
|
|
|
|
current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
|
|
|
|
|
|
case "$current_phase" in
|
2026-03-21 19:39:04 +00:00
|
|
|
|
PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate)
|
2026-03-18 16:40:51 +00:00
|
|
|
|
;; # terminal — fall through to phase handler
|
|
|
|
|
|
*)
|
|
|
|
|
|
# Call callback with "crashed" — let agent-specific code handle recovery
|
|
|
|
|
|
if type "${callback}" &>/dev/null; then
|
|
|
|
|
|
"$callback" "PHASE:crashed"
|
|
|
|
|
|
fi
|
|
|
|
|
|
# If callback didn't restart session, break
|
2026-03-18 20:15:14 +00:00
|
|
|
|
if ! tmux has-session -t "${_session}" 2>/dev/null; then
|
2026-03-18 16:40:51 +00:00
|
|
|
|
_MONITOR_LOOP_EXIT="crashed"
|
|
|
|
|
|
return 1
|
|
|
|
|
|
fi
|
|
|
|
|
|
idle_elapsed=0
|
2026-03-19 13:47:10 +01:00
|
|
|
|
idle_pane_count=0
|
2026-03-18 16:40:51 +00:00
|
|
|
|
continue
|
|
|
|
|
|
;;
|
|
|
|
|
|
esac
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-03-19 17:55:06 +00:00
|
|
|
|
# Check phase-changed marker from PostToolUse hook — if present, the hook
|
|
|
|
|
|
# detected a phase file write so we reset last_mtime to force processing
|
|
|
|
|
|
# this cycle instead of waiting for the next mtime change.
|
|
|
|
|
|
local phase_marker="/tmp/phase-changed-${_session}.marker"
|
|
|
|
|
|
if [ -f "$phase_marker" ]; then
|
|
|
|
|
|
rm -f "$phase_marker"
|
|
|
|
|
|
last_mtime=0
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-03-18 16:40:51 +00:00
|
|
|
|
# Check phase file for changes
|
|
|
|
|
|
local phase_mtime
|
|
|
|
|
|
phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0)
|
|
|
|
|
|
local current_phase
|
|
|
|
|
|
current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
|
|
|
|
|
|
|
|
|
|
|
|
if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$last_mtime" ]; then
|
|
|
|
|
|
# No phase change — check idle timeout
|
|
|
|
|
|
if [ "$idle_elapsed" -ge "$idle_timeout" ]; then
|
|
|
|
|
|
_MONITOR_LOOP_EXIT="idle_timeout"
|
2026-03-18 20:15:14 +00:00
|
|
|
|
agent_kill_session "${_session}"
|
2026-03-18 16:40:51 +00:00
|
|
|
|
return 0
|
|
|
|
|
|
fi
|
2026-03-19 14:57:54 +01:00
|
|
|
|
# Idle detection via Stop hook: the on-idle-stop.sh hook writes a marker
|
|
|
|
|
|
# file when Claude finishes a response. If the marker exists and no phase
|
|
|
|
|
|
# has been written, Claude returned to the prompt without following the
|
|
|
|
|
|
# phase protocol. 3 consecutive polls = confirmed idle (not mid-turn).
|
|
|
|
|
|
local idle_marker="/tmp/claude-idle-${_session}.ts"
|
|
|
|
|
|
if [ -z "$current_phase" ] && [ -f "$idle_marker" ]; then
|
2026-03-19 13:47:10 +01:00
|
|
|
|
idle_pane_count=$(( idle_pane_count + 1 ))
|
|
|
|
|
|
if [ "$idle_pane_count" -ge 3 ]; then
|
|
|
|
|
|
_MONITOR_LOOP_EXIT="idle_prompt"
|
2026-03-19 14:57:54 +01:00
|
|
|
|
# Session is killed before the callback is invoked.
|
2026-03-19 13:47:10 +01:00
|
|
|
|
# Callbacks that handle PHASE:failed must not assume the session is alive.
|
|
|
|
|
|
agent_kill_session "${_session}"
|
|
|
|
|
|
if type "${callback}" &>/dev/null; then
|
|
|
|
|
|
"$callback" "PHASE:failed"
|
|
|
|
|
|
fi
|
|
|
|
|
|
return 0
|
|
|
|
|
|
fi
|
|
|
|
|
|
else
|
|
|
|
|
|
idle_pane_count=0
|
|
|
|
|
|
fi
|
2026-03-18 16:40:51 +00:00
|
|
|
|
continue
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# Phase changed
|
|
|
|
|
|
last_mtime="$phase_mtime"
|
2026-03-18 17:26:54 +00:00
|
|
|
|
# shellcheck disable=SC2034 # read by phase-handler.sh callback
|
|
|
|
|
|
LAST_PHASE_MTIME="$phase_mtime"
|
2026-03-18 16:40:51 +00:00
|
|
|
|
idle_elapsed=0
|
2026-03-21 17:14:54 +00:00
|
|
|
|
idle_pane_count=0
|
2026-03-18 16:40:51 +00:00
|
|
|
|
|
|
|
|
|
|
# Terminal phases
|
|
|
|
|
|
case "$current_phase" in
|
|
|
|
|
|
PHASE:done|PHASE:merged)
|
|
|
|
|
|
_MONITOR_LOOP_EXIT="done"
|
|
|
|
|
|
if type "${callback}" &>/dev/null; then
|
|
|
|
|
|
"$callback" "$current_phase"
|
|
|
|
|
|
fi
|
|
|
|
|
|
return 0
|
|
|
|
|
|
;;
|
2026-03-21 19:39:04 +00:00
|
|
|
|
PHASE:failed|PHASE:escalate)
|
2026-03-18 16:40:51 +00:00
|
|
|
|
_MONITOR_LOOP_EXIT="$current_phase"
|
|
|
|
|
|
if type "${callback}" &>/dev/null; then
|
|
|
|
|
|
"$callback" "$current_phase"
|
|
|
|
|
|
fi
|
|
|
|
|
|
return 0
|
|
|
|
|
|
;;
|
|
|
|
|
|
esac
|
|
|
|
|
|
|
|
|
|
|
|
# Non-terminal phase — call callback
|
|
|
|
|
|
if type "${callback}" &>/dev/null; then
|
|
|
|
|
|
"$callback" "$current_phase"
|
|
|
|
|
|
fi
|
|
|
|
|
|
done
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-20 23:27:32 +00:00
|
|
|
|
# Write context to a file for re-injection after context compaction.
|
|
|
|
|
|
# The SessionStart compact hook reads this file and outputs it to stdout.
|
|
|
|
|
|
# Args: phase_file content
|
|
|
|
|
|
write_compact_context() {
|
|
|
|
|
|
local phase_file="$1"
|
|
|
|
|
|
local content="$2"
|
|
|
|
|
|
local context_file="${phase_file%.phase}.context"
|
|
|
|
|
|
printf '%s\n' "$content" > "$context_file"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-18 16:21:07 +01:00
|
|
|
|
# Kill a tmux session gracefully (no-op if not found).
|
|
|
|
|
|
agent_kill_session() {
|
2026-03-18 16:24:58 +00:00
|
|
|
|
local session="${1:-}"
|
|
|
|
|
|
[ -n "$session" ] && tmux kill-session -t "$session" 2>/dev/null || true
|
2026-03-19 14:57:54 +01:00
|
|
|
|
rm -f "/tmp/claude-idle-${session}.ts"
|
2026-03-19 17:55:06 +00:00
|
|
|
|
rm -f "/tmp/phase-changed-${session}.marker"
|
2026-03-20 01:11:26 +00:00
|
|
|
|
rm -f "/tmp/claude-exited-${session}.ts"
|
2026-03-23 09:56:49 +00:00
|
|
|
|
rm -f "/tmp/claude-nudge-${session}.count"
|
refactor: extract lib/agent-session.sh — reusable tmux + Claude agent runtime (#158)
Move generic agent infrastructure from dev/dev-agent.sh into lib/agent-session.sh:
- log, status, notify, notify_ctx, read_phase, wait_for_claude_ready,
inject_into_session, kill_tmux_session extracted verbatim
- create_agent_session(session_name, workdir) — new: tmux session creation
- inject_formula(session_name, formula_text, context) — new: prompt injection
- monitor_phase_loop(phase_file, idle_timeout, callback_fn) — new: phase loop
with session health check, crash recovery, and idle timeout detection
dev-agent.sh: sources the library, implements _on_phase_change() callback,
calls monitor_phase_loop(); idle-timeout and crash-recovery-failed cleanup
handled via _MONITOR_LOOP_EXIT signal variable. Behavior unchanged.
2026-03-18 14:36:36 +00:00
|
|
|
|
}
|
2026-03-18 17:11:02 +00:00
|
|
|
|
|
|
|
|
|
|
# Read the current phase from a phase file, stripped of whitespace.
|
|
|
|
|
|
# Usage: read_phase [file] — defaults to $PHASE_FILE
|
|
|
|
|
|
read_phase() {
|
|
|
|
|
|
local file="${1:-${PHASE_FILE:-}}"
|
|
|
|
|
|
{ cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]'
|
|
|
|
|
|
}
|