fix: Session lock must not block during idle phases (awaiting_review/awaiting_ci) (#724)

Restructure session.lock from command-wrapper flock to fd-based flock so
the lock can be released when Claude is idle and re-acquired before
injecting the next prompt.

- agent-session.sh: add session_lock_acquire/release helpers, open fd in
  create_agent_session instead of wrapping claude with flock, auto-acquire
  in agent_inject_into_session before injecting
- phase-handler.sh: call session_lock_release at start of awaiting_ci and
  awaiting_review handlers (Claude is idle during CI polling / review wait)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-26 10:11:50 +00:00
parent cc4c6d7efa
commit af39b833af
2 changed files with 50 additions and 5 deletions

View file

@ -261,6 +261,8 @@ _on_phase_change() {
# ── PHASE: awaiting_ci ────────────────────────────────────────────────────── # ── PHASE: awaiting_ci ──────────────────────────────────────────────────────
if [ "$phase" = "PHASE:awaiting_ci" ]; then if [ "$phase" = "PHASE:awaiting_ci" ]; then
# Release session lock — Claude is idle during CI polling (#724)
session_lock_release
# Create PR if not yet created # Create PR if not yet created
if [ -z "${PR_NUMBER:-}" ]; then if [ -z "${PR_NUMBER:-}" ]; then
@ -458,6 +460,8 @@ Instructions:
# ── PHASE: awaiting_review ────────────────────────────────────────────────── # ── PHASE: awaiting_review ──────────────────────────────────────────────────
elif [ "$phase" = "PHASE:awaiting_review" ]; then elif [ "$phase" = "PHASE:awaiting_review" ]; then
# Release session lock — Claude is idle during review wait (#724)
session_lock_release
status "waiting for review on PR #${PR_NUMBER:-?}" status "waiting for review on PR #${PR_NUMBER:-?}"
CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle

View file

@ -8,6 +8,37 @@
# agent_inject_into_session SESSION_NAME TEXT # agent_inject_into_session SESSION_NAME TEXT
# agent_kill_session SESSION_NAME # agent_kill_session SESSION_NAME
# monitor_phase_loop PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME] # monitor_phase_loop PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME]
# session_lock_acquire [TIMEOUT_SECS]
# session_lock_release
# --- Cooperative session lock (fd-based) ---
# File descriptor for the session lock. Set by create_agent_session().
# Callers can release/re-acquire via session_lock_release/session_lock_acquire
# to allow other Claude sessions during idle phases (awaiting_review/awaiting_ci).
SESSION_LOCK_FD=""
# Release the session lock without closing the file descriptor.
# The fd stays open so it can be re-acquired later.
session_lock_release() {
if [ -n "${SESSION_LOCK_FD:-}" ]; then
flock -u "$SESSION_LOCK_FD"
fi
}
# Re-acquire the session lock. Blocks until available or timeout.
# Opens the lock fd if not already open (for use by external callers).
# Args: [timeout_secs] (default 300)
# Returns 0 on success, 1 on timeout/error.
# shellcheck disable=SC2120 # timeout arg is used by external callers
session_lock_acquire() {
local timeout="${1:-300}"
if [ -z "${SESSION_LOCK_FD:-}" ]; then
local lock_dir="${HOME}/.claude"
mkdir -p "$lock_dir"
exec {SESSION_LOCK_FD}>>"${lock_dir}/session.lock"
fi
flock -w "$timeout" "$SESSION_LOCK_FD"
}
# Wait for the Claude ready prompt in a tmux pane. # Wait for the Claude ready prompt in a tmux pane.
# Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise. # Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise.
@ -30,6 +61,9 @@ agent_inject_into_session() {
local session="$1" local session="$1"
local text="$2" local text="$2"
local tmpfile local tmpfile
# Re-acquire session lock before injecting — Claude will resume working
# shellcheck disable=SC2119 # using default timeout
session_lock_acquire || true
agent_wait_for_claude_ready "$session" 120 || true agent_wait_for_claude_ready "$session" 120 || true
# Clear idle marker — new work incoming # Clear idle marker — new work incoming
rm -f "/tmp/claude-idle-${session}.ts" rm -f "/tmp/claude-idle-${session}.ts"
@ -288,16 +322,23 @@ create_agent_session() {
model_flag="--model ${CLAUDE_MODEL}" model_flag="--model ${CLAUDE_MODEL}"
fi fi
# Acquire a session-level mutex via flock to prevent concurrent Claude # Acquire a session-level mutex via fd-based flock to prevent concurrent
# sessions from racing on OAuth token refresh (rotating the refresh token # Claude sessions from racing on OAuth token refresh. Unlike the previous
# invalidates it for other sessions). The lock is held for the lifetime # command-wrapper flock, the fd approach allows callers to release the lock
# of the Claude process inside the tmux session. # during idle phases (awaiting_review/awaiting_ci) and re-acquire before
# injecting the next prompt. See #724.
# Use ~/.claude/session.lock so the lock is shared across containers when # Use ~/.claude/session.lock so the lock is shared across containers when
# the host ~/.claude directory is bind-mounted. # the host ~/.claude directory is bind-mounted.
local lock_dir="${HOME}/.claude" local lock_dir="${HOME}/.claude"
mkdir -p "$lock_dir" mkdir -p "$lock_dir"
local claude_lock="${lock_dir}/session.lock" local claude_lock="${lock_dir}/session.lock"
local claude_cmd="flock -w 300 '${claude_lock}' claude --dangerously-skip-permissions ${model_flag}" if [ -z "${SESSION_LOCK_FD:-}" ]; then
exec {SESSION_LOCK_FD}>>"${claude_lock}"
fi
if ! flock -w 300 "$SESSION_LOCK_FD"; then
return 1
fi
local claude_cmd="claude --dangerously-skip-permissions ${model_flag}"
tmux new-session -d -s "$session" -c "$workdir" \ tmux new-session -d -s "$session" -c "$workdir" \
"$claude_cmd" 2>/dev/null "$claude_cmd" 2>/dev/null