Fixes #261 ## Changes Fixed gardener hanging forever when Claude skips phase protocol. Three changes: (1) gardener-agent.sh: replaced 999999s timeout with 7200s (2h, matching dev-agent); (2) lib/agent-session.sh: added idle-prompt detection to monitor_phase_loop — if Claude returns to the ❯ prompt for 3 consecutive polls with no phase file written, exits immediately with _MONITOR_LOOP_EXIT=idle_prompt (only fires when phase file is empty, so awaiting_ci/review waits are unaffected); (3) gardener prompt: removed 'no time limit' wording, replaced with explicit phase-write requirement. Co-authored-by: openhands <openhands@all-hands.dev> Reviewed-on: https://codeberg.org/johba/disinto/pulls/263 Reviewed-by: Disinto_bot <disinto_bot@noreply.codeberg.org>
This commit is contained in:
parent
e024b0de03
commit
d5c2c213a3
5 changed files with 58 additions and 13 deletions
|
|
@ -60,9 +60,9 @@ get_candidates() {
|
||||||
if (match(p, /^[a-z][a-zA-Z0-9_]*_[a-zA-Z0-9_]+/)) {
|
if (match(p, /^[a-z][a-zA-Z0-9_]*_[a-zA-Z0-9_]+/)) {
|
||||||
word = substr(p, RSTART, RLENGTH)
|
word = substr(p, RSTART, RLENGTH)
|
||||||
rest = substr(p, RSTART + RLENGTH, 1)
|
rest = substr(p, RSTART + RLENGTH, 1)
|
||||||
# Skip: case labels (word)), Python/jq patterns (word:),
|
# Skip: case labels (word) or word|), Python/jq patterns (word:),
|
||||||
# object method calls (word.method), assignments (word=)
|
# object method calls (word.method), assignments (word=)
|
||||||
if (rest == ")" || rest == ":" || rest == "." || rest == "=") continue
|
if (rest == ")" || rest == "|" || rest == ":" || rest == "." || rest == "=") continue
|
||||||
print word
|
print word
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -232,7 +232,7 @@ sourced as needed.
|
||||||
| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `CODEBERG_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, Matrix config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) |
|
| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `CODEBERG_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, Matrix config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) |
|
||||||
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` patterns. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
|
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` patterns. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
|
||||||
| `lib/matrix_listener.sh` | Long-poll Matrix sync daemon. Dispatches thread replies to the correct agent via well-known files (`/tmp/{agent}-escalation-reply`). Handles supervisor, gardener, dev, review, vault, and action reply routing. Run as systemd service. | Standalone daemon |
|
| `lib/matrix_listener.sh` | Long-poll Matrix sync daemon. Dispatches thread replies to the correct agent via well-known files (`/tmp/{agent}-escalation-reply`). Handles supervisor, gardener, dev, review, vault, and action reply routing. Run as systemd service. | Standalone daemon |
|
||||||
| `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`. | dev-agent.sh, gardener-agent.sh |
|
| `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `❯` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or a `PHASE:*` string. Agents must handle `idle_prompt` in both their callback and their post-loop exit handler. | dev-agent.sh, gardener-agent.sh |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -714,12 +714,18 @@ monitor_phase_loop "$PHASE_FILE" "$IDLE_TIMEOUT" _on_phase_change
|
||||||
|
|
||||||
# Handle exit reason from monitor_phase_loop
|
# Handle exit reason from monitor_phase_loop
|
||||||
case "${_MONITOR_LOOP_EXIT:-}" in
|
case "${_MONITOR_LOOP_EXIT:-}" in
|
||||||
idle_timeout)
|
idle_timeout|idle_prompt)
|
||||||
|
if [ "${_MONITOR_LOOP_EXIT:-}" = "idle_prompt" ]; then
|
||||||
|
notify_ctx \
|
||||||
|
"session finished without phase signal — killed. Escalating to gardener." \
|
||||||
|
"session finished without phase signal — killed. Escalating to gardener.${PR_NUMBER:+ PR <a href='${CODEBERG_WEB}/pulls/${PR_NUMBER}'>#${PR_NUMBER}</a>}"
|
||||||
|
else
|
||||||
notify_ctx \
|
notify_ctx \
|
||||||
"session idle for 2h — killed. Escalating to gardener." \
|
"session idle for 2h — killed. Escalating to gardener." \
|
||||||
"session idle for 2h — killed. Escalating to gardener.${PR_NUMBER:+ PR <a href='${CODEBERG_WEB}/pulls/${PR_NUMBER}'>#${PR_NUMBER}</a>}"
|
"session idle for 2h — killed. Escalating to gardener.${PR_NUMBER:+ PR <a href='${CODEBERG_WEB}/pulls/${PR_NUMBER}'>#${PR_NUMBER}</a>}"
|
||||||
|
fi
|
||||||
# Escalate: write to project-suffixed escalation file so gardener picks it up
|
# Escalate: write to project-suffixed escalation file so gardener picks it up
|
||||||
echo "{\"issue\":${ISSUE},\"pr\":${PR_NUMBER:-0},\"reason\":\"idle_timeout\",\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
|
echo "{\"issue\":${ISSUE},\"pr\":${PR_NUMBER:-0},\"reason\":\"${_MONITOR_LOOP_EXIT:-idle_timeout}\",\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" \
|
||||||
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
|
>> "${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
|
||||||
# Restore labels: remove in-progress, add backlog
|
# Restore labels: remove in-progress, add backlog
|
||||||
cleanup_labels
|
cleanup_labels
|
||||||
|
|
|
||||||
|
|
@ -222,7 +222,7 @@ fi
|
||||||
# ── Build prompt from formula + dynamic context ────────────────────────────
|
# ── Build prompt from formula + dynamic context ────────────────────────────
|
||||||
log "Building gardener prompt from formula"
|
log "Building gardener prompt from formula"
|
||||||
|
|
||||||
PROMPT="You are the issue gardener for ${CODEBERG_REPO}. Work through the formula below — there is no time limit, run until PHASE:done.
|
PROMPT="You are the issue gardener for ${CODEBERG_REPO}. Work through the formula below. You MUST write PHASE:done to '${PHASE_FILE}' when finished — the orchestrator will time you out if you return to the prompt without signalling.
|
||||||
|
|
||||||
${CONTEXT_SECTION}
|
${CONTEXT_SECTION}
|
||||||
## Formula
|
## Formula
|
||||||
|
|
@ -303,14 +303,23 @@ gardener_phase_callback() {
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
# No idle timeout — gardener runs until PHASE:done or PHASE:failed
|
monitor_phase_loop "$PHASE_FILE" 7200 "gardener_phase_callback"
|
||||||
monitor_phase_loop "$PHASE_FILE" 999999 "gardener_phase_callback"
|
|
||||||
|
|
||||||
FINAL_PHASE=$(read_phase)
|
FINAL_PHASE=$(read_phase)
|
||||||
log "Final phase: ${FINAL_PHASE:-none}"
|
log "Final phase: ${FINAL_PHASE:-none}"
|
||||||
|
|
||||||
if [ "$FINAL_PHASE" != "PHASE:done" ]; then
|
if [ "$FINAL_PHASE" != "PHASE:done" ]; then
|
||||||
log "gardener-agent finished without PHASE:done (phase: ${FINAL_PHASE:-none})"
|
case "${_MONITOR_LOOP_EXIT:-}" in
|
||||||
|
idle_prompt)
|
||||||
|
log "gardener-agent: Claude returned to prompt without writing phase signal — no phase file written"
|
||||||
|
;;
|
||||||
|
idle_timeout)
|
||||||
|
log "gardener-agent: timed out after 2h with no phase signal"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
log "gardener-agent finished without PHASE:done (phase: ${FINAL_PHASE:-none}, exit: ${_MONITOR_LOOP_EXIT:-})"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,11 +60,16 @@ inject_formula() {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Monitor a phase file, calling a callback on changes and handling idle timeout.
|
# Monitor a phase file, calling a callback on changes and handling idle timeout.
|
||||||
# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, done, failed, break).
|
# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, failed, break).
|
||||||
# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME).
|
# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME).
|
||||||
# Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly.
|
# Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly.
|
||||||
# Args: phase_file idle_timeout_secs callback_fn [session_name]
|
# Args: phase_file idle_timeout_secs callback_fn [session_name]
|
||||||
# session_name — tmux session to health-check; falls back to $SESSION_NAME global
|
# session_name — tmux session to health-check; falls back to $SESSION_NAME global
|
||||||
|
#
|
||||||
|
# Idle prompt detection: if Claude returns to the ❯ prompt for 3 consecutive polls
|
||||||
|
# WITHOUT having written any phase signal, the session is killed and the callback is
|
||||||
|
# invoked with "PHASE:failed". This handles the case where Claude completes its work
|
||||||
|
# but skips the phase protocol entirely.
|
||||||
monitor_phase_loop() {
|
monitor_phase_loop() {
|
||||||
local phase_file="$1"
|
local phase_file="$1"
|
||||||
local idle_timeout="$2"
|
local idle_timeout="$2"
|
||||||
|
|
@ -76,6 +81,7 @@ monitor_phase_loop() {
|
||||||
local poll_interval="${PHASE_POLL_INTERVAL:-10}"
|
local poll_interval="${PHASE_POLL_INTERVAL:-10}"
|
||||||
local last_mtime=0
|
local last_mtime=0
|
||||||
local idle_elapsed=0
|
local idle_elapsed=0
|
||||||
|
local idle_pane_count=0
|
||||||
|
|
||||||
while true; do
|
while true; do
|
||||||
sleep "$poll_interval"
|
sleep "$poll_interval"
|
||||||
|
|
@ -99,6 +105,7 @@ monitor_phase_loop() {
|
||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
idle_elapsed=0
|
idle_elapsed=0
|
||||||
|
idle_pane_count=0
|
||||||
continue
|
continue
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
@ -117,6 +124,29 @@ monitor_phase_loop() {
|
||||||
agent_kill_session "${_session}"
|
agent_kill_session "${_session}"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
# Idle prompt detection: Claude finished without writing a phase signal.
|
||||||
|
# Only fires when current_phase is empty (no phase ever written).
|
||||||
|
# Note: tmux capture-pane captures the full visible pane area, not just the
|
||||||
|
# last line. Prior tool output containing ❯ (e.g. a zsh subshell prompt in
|
||||||
|
# Claude's output) could trigger a false positive — the same risk exists in
|
||||||
|
# agent_wait_for_claude_ready(). Requiring 3 consecutive polls (≥2 poll
|
||||||
|
# intervals of sustained idle) reduces but does not eliminate this risk.
|
||||||
|
if [ -z "$current_phase" ] && tmux has-session -t "${_session}" 2>/dev/null && \
|
||||||
|
tmux capture-pane -t "${_session}" -p 2>/dev/null | grep -q '❯'; then
|
||||||
|
idle_pane_count=$(( idle_pane_count + 1 ))
|
||||||
|
if [ "$idle_pane_count" -ge 3 ]; then
|
||||||
|
_MONITOR_LOOP_EXIT="idle_prompt"
|
||||||
|
# Session is already killed before the callback is invoked.
|
||||||
|
# Callbacks that handle PHASE:failed must not assume the session is alive.
|
||||||
|
agent_kill_session "${_session}"
|
||||||
|
if type "${callback}" &>/dev/null; then
|
||||||
|
"$callback" "PHASE:failed"
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
idle_pane_count=0
|
||||||
|
fi
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue