fix: feat: unified escalation — single PHASE:escalate path for all agents (#510)
Replace PHASE:needs_human with PHASE:escalate across all agent types. Consolidates 6 overlapping escalation mechanisms into one unified path: detect → notify via Matrix → session stays alive → human reply injected → resume. Key changes: - PHASE:escalate replaces PHASE:needs_human everywhere (16 files) - CI exhausted now escalates instead of immediately marking blocked - Matrix listener routes free-text replies to vault tmux sessions - Vault agent writes PHASE:escalate files for procurement requests - Supervisor monitors PHASE:escalate sessions in health checks - 24h timeout on escalation → blocked label + session killed - All 38 phase protocol tests updated and passing Supersedes #462, #458, #465.
This commit is contained in:
parent
725c4d7334
commit
5822dc89d9
18 changed files with 138 additions and 95 deletions
|
|
@ -599,7 +599,13 @@ curl -sf -X PATCH \\
|
|||
echo \"PHASE:done\" > \"${PHASE_FILE}\"
|
||||
\`\`\`
|
||||
If merge fails due to conflicts, rebase first then retry the merge.
|
||||
If merge repeatedly fails, write PHASE:needs_human.
|
||||
If merge repeatedly fails, write PHASE:escalate with a reason.
|
||||
|
||||
**When you need human help (CI exhausted, merge blocked, stuck on a decision):**
|
||||
\`\`\`bash
|
||||
printf 'PHASE:escalate\nReason: %s\n' \"describe what you need\" > \"${PHASE_FILE}\"
|
||||
\`\`\`
|
||||
Then STOP and wait. A human will reply via Matrix and the response will be injected.
|
||||
|
||||
**If refusing (too large, unmet dep, already done):**
|
||||
\`\`\`bash
|
||||
|
|
@ -779,7 +785,7 @@ case "${_MONITOR_LOOP_EXIT:-}" in
|
|||
"session idle for 2h — killed. Marking blocked." \
|
||||
"session idle for 2h — killed. Marking blocked.${PR_NUMBER:+ PR <a href='${CODEBERG_WEB}/pulls/${PR_NUMBER}'>#${PR_NUMBER}</a>}"
|
||||
fi
|
||||
# Post diagnostic comment + label issue blocked (replaces escalation JSONL)
|
||||
# Post diagnostic comment + label issue blocked
|
||||
post_blocked_diagnostic "${_MONITOR_LOOP_EXIT:-idle_timeout}"
|
||||
if [ -n "${PR_NUMBER:-}" ]; then
|
||||
log "keeping worktree (PR #${PR_NUMBER} still open)"
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh"
|
|||
: "${PHASE_POLL_INTERVAL:=30}"
|
||||
|
||||
# --- Post diagnostic comment + label issue as blocked ---
|
||||
# Replaces the old escalation JSONL write path.
|
||||
# Captures tmux pane output, posts a structured comment on the issue, removes
|
||||
# in-progress label, and adds the "blocked" label.
|
||||
#
|
||||
|
|
@ -160,6 +159,12 @@ echo "PHASE:awaiting_ci" > "${_pf}"
|
|||
\`\`\`
|
||||
(CI runs again after each push — always write awaiting_ci, not awaiting_review)
|
||||
|
||||
**When you need human help (CI exhausted, merge blocked, stuck on a decision):**
|
||||
\`\`\`bash
|
||||
printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}"
|
||||
\`\`\`
|
||||
Then STOP and wait. A human will reply via Matrix and the response will be injected.
|
||||
|
||||
**On unrecoverable failure:**
|
||||
\`\`\`bash
|
||||
printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}"
|
||||
|
|
@ -173,7 +178,7 @@ _PHASE_PROTOCOL_EOF_
|
|||
# Returns:
|
||||
# 0 = merged successfully
|
||||
# 1 = other failure (conflict, network error, etc.)
|
||||
# 2 = not enough approvals (HTTP 405) — PHASE:needs_human already written
|
||||
# 2 = not enough approvals (HTTP 405) — PHASE:escalate already written
|
||||
do_merge() {
|
||||
local pr_num="$1"
|
||||
local merge_response merge_http_code merge_body
|
||||
|
|
@ -193,7 +198,7 @@ do_merge() {
|
|||
# HTTP 405 — merge requirements not met (approvals, branch protection); structural, not transient
|
||||
if [ "$merge_http_code" = "405" ]; then
|
||||
log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}"
|
||||
printf 'PHASE:needs_human\nReason: %s\n' \
|
||||
printf 'PHASE:escalate\nReason: %s\n' \
|
||||
"PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \
|
||||
> "$PHASE_FILE"
|
||||
return 2
|
||||
|
|
@ -345,7 +350,7 @@ Write PHASE:awaiting_review to the phase file, then stop and wait for review fee
|
|||
if ! $CI_DONE; then
|
||||
log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s"
|
||||
notify "CI timeout on PR #${PR_NUMBER}"
|
||||
agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:needs_human if you cannot proceed."
|
||||
agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed."
|
||||
return 0
|
||||
fi
|
||||
|
||||
|
|
@ -395,13 +400,12 @@ Write PHASE:awaiting_review to the phase file, then stop and wait for review fee
|
|||
CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 ))
|
||||
_ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}"
|
||||
if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then
|
||||
log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — marking blocked"
|
||||
post_blocked_diagnostic "ci_exhausted after ${CI_FIX_COUNT} attempts (step: ${FAILED_STEP:-unknown})"
|
||||
log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating"
|
||||
notify_ctx \
|
||||
"CI exhausted after ${CI_FIX_COUNT} attempts — issue marked blocked" \
|
||||
"CI exhausted after ${CI_FIX_COUNT} attempts on PR <a href='${PR_URL:-${CODEBERG_WEB}/pulls/${PR_NUMBER}}'>#${PR_NUMBER}</a> | <a href='${_ci_pipeline_url}'>Pipeline</a><br>Step: <code>${FAILED_STEP:-unknown}</code> — issue marked blocked"
|
||||
printf 'PHASE:failed\nReason: ci_exhausted after %d attempts\n' "$CI_FIX_COUNT" > "$PHASE_FILE"
|
||||
# Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:failed
|
||||
"CI exhausted after ${CI_FIX_COUNT} attempts — escalating for human help" \
|
||||
"CI exhausted after ${CI_FIX_COUNT} attempts on PR <a href='${PR_URL:-${CODEBERG_WEB}/pulls/${PR_NUMBER}}'>#${PR_NUMBER}</a> | <a href='${_ci_pipeline_url}'>Pipeline</a><br>Step: <code>${FAILED_STEP:-unknown}</code> — escalating for human help"
|
||||
printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE"
|
||||
# Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate
|
||||
return 0
|
||||
fi
|
||||
|
||||
|
|
@ -551,9 +555,9 @@ Rebase onto ${PRIMARY_BRANCH} and push:
|
|||
echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
|
||||
|
||||
Do NOT merge or close the issue — the orchestrator handles that after CI passes.
|
||||
If rebase repeatedly fails, write PHASE:needs_human with a reason."
|
||||
If rebase repeatedly fails, write PHASE:escalate with a reason."
|
||||
fi
|
||||
# _merge_rc=2: PHASE:needs_human already written by do_merge()
|
||||
# _merge_rc=2: PHASE:escalate already written by do_merge()
|
||||
break
|
||||
|
||||
elif [ "$VERDICT" = "REQUEST_CHANGES" ] || [ "$VERDICT" = "DISCUSS" ]; then
|
||||
|
|
@ -618,21 +622,21 @@ Instructions:
|
|||
if ! $REVIEW_FOUND && [ "$REVIEW_POLL_ELAPSED" -ge "$REVIEW_POLL_TIMEOUT" ]; then
|
||||
log "TIMEOUT: no review after 3h"
|
||||
notify "no review received for PR #${PR_NUMBER} after 3h"
|
||||
agent_inject_into_session "$SESSION_NAME" "TIMEOUT: No review received after 3 hours for PR #${PR_NUMBER}. Write PHASE:needs_human to escalate to a human reviewer."
|
||||
agent_inject_into_session "$SESSION_NAME" "TIMEOUT: No review received after 3 hours for PR #${PR_NUMBER}. Write PHASE:escalate to escalate to a human reviewer."
|
||||
fi
|
||||
|
||||
# ── PHASE: needs_human ──────────────────────────────────────────────────────
|
||||
elif [ "$phase" = "PHASE:needs_human" ]; then
|
||||
status "needs human input on issue #${ISSUE}"
|
||||
HUMAN_REASON=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null | sed 's/^Reason: //' || echo "")
|
||||
# ── PHASE: escalate ──────────────────────────────────────────────────────
|
||||
elif [ "$phase" = "PHASE:escalate" ]; then
|
||||
status "escalated — waiting for human input on issue #${ISSUE}"
|
||||
ESCALATE_REASON=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null | sed 's/^Reason: //' || echo "")
|
||||
_issue_url="${CODEBERG_WEB}/issues/${ISSUE}"
|
||||
_pr_link=""
|
||||
[ -n "${PR_NUMBER:-}" ] && _pr_link=" | PR <a href='${CODEBERG_WEB}/pulls/${PR_NUMBER}'>#${PR_NUMBER}</a>"
|
||||
notify_ctx \
|
||||
"⚠️ Issue #${ISSUE} (PR #${PR_NUMBER:-none}) needs human input.${HUMAN_REASON:+ Reason: ${HUMAN_REASON}}" \
|
||||
"⚠️ <a href='${_issue_url}'>Issue #${ISSUE}</a>${_pr_link} needs human input.${HUMAN_REASON:+ Reason: ${HUMAN_REASON}}<br>Reply in this thread to send guidance to the dev agent."
|
||||
log "phase: needs_human — notified via Matrix, waiting for external injection"
|
||||
# Don't inject anything — supervisor-run.sh (#81) injects human replies
|
||||
"⚠️ Issue #${ISSUE} (PR #${PR_NUMBER:-none}) escalated — needs human input.${ESCALATE_REASON:+ Reason: ${ESCALATE_REASON}}" \
|
||||
"⚠️ <a href='${_issue_url}'>Issue #${ISSUE}</a>${_pr_link} escalated — needs human input.${ESCALATE_REASON:+ Reason: ${ESCALATE_REASON}}<br>Reply in this thread to send guidance to the agent."
|
||||
log "phase: escalate — notified via Matrix, session stays alive waiting for reply"
|
||||
# Session stays alive — matrix_listener injects human reply directly
|
||||
|
||||
# ── PHASE: done ─────────────────────────────────────────────────────────────
|
||||
# PR merged and issue closed (by orchestrator or Claude). Just clean up local state.
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ check_phase() {
|
|||
|
||||
check_phase "PHASE:awaiting_ci"
|
||||
check_phase "PHASE:awaiting_review"
|
||||
check_phase "PHASE:needs_human"
|
||||
check_phase "PHASE:escalate"
|
||||
check_phase "PHASE:done"
|
||||
check_phase "PHASE:failed"
|
||||
|
||||
|
|
@ -109,14 +109,14 @@ fi
|
|||
is_valid_phase() {
|
||||
local p="$1"
|
||||
case "$p" in
|
||||
PHASE:awaiting_ci|PHASE:awaiting_review|PHASE:needs_human|PHASE:done|PHASE:failed)
|
||||
PHASE:awaiting_ci|PHASE:awaiting_review|PHASE:escalate|PHASE:done|PHASE:failed)
|
||||
return 0 ;;
|
||||
*)
|
||||
return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
for p in "PHASE:awaiting_ci" "PHASE:awaiting_review" "PHASE:needs_human" \
|
||||
for p in "PHASE:awaiting_ci" "PHASE:awaiting_review" "PHASE:escalate" \
|
||||
"PHASE:done" "PHASE:failed"; do
|
||||
if is_valid_phase "$p"; then
|
||||
ok "is_valid_phase: $p"
|
||||
|
|
@ -131,14 +131,14 @@ else
|
|||
fail "is_valid_phase should reject PHASE:unknown"
|
||||
fi
|
||||
|
||||
# ── Test 8: needs_human mtime guard — no duplicate notify on second poll ─────
|
||||
# ── Test 8: escalate mtime guard — no duplicate notify on second poll ─────
|
||||
# Simulates the LAST_PHASE_MTIME guard from dev-agent.sh: after the orchestrator
|
||||
# handles PHASE:needs_human once, subsequent poll cycles must not re-trigger
|
||||
# handles PHASE:escalate once, subsequent poll cycles must not re-trigger
|
||||
# notify() if the phase file was not rewritten.
|
||||
NOTIFY_COUNT=0
|
||||
mock_notify() { NOTIFY_COUNT=$((NOTIFY_COUNT + 1)); }
|
||||
|
||||
echo "PHASE:needs_human" > "$PHASE_FILE"
|
||||
echo "PHASE:escalate" > "$PHASE_FILE"
|
||||
LAST_PHASE_MTIME=0
|
||||
|
||||
# --- First poll cycle: phase file is newer than LAST_PHASE_MTIME ---
|
||||
|
|
@ -162,9 +162,9 @@ if [ -n "$CURRENT_PHASE" ] && [ "$PHASE_MTIME" -gt "$LAST_PHASE_MTIME" ]; then
|
|||
fi
|
||||
|
||||
if [ "$NOTIFY_COUNT" -eq 1 ]; then
|
||||
ok "needs_human mtime guard: notify called once, blocked on second poll"
|
||||
ok "escalate mtime guard: notify called once, blocked on second poll"
|
||||
else
|
||||
fail "needs_human mtime guard: expected 1 notify call, got $NOTIFY_COUNT"
|
||||
fail "escalate mtime guard: expected 1 notify call, got $NOTIFY_COUNT"
|
||||
fi
|
||||
|
||||
# ── Test 9: PostToolUse hook detects writes, ignores reads ────────────────
|
||||
|
|
@ -317,15 +317,15 @@ if [ -x "$STOP_FAILURE_HOOK" ]; then
|
|||
fi
|
||||
rm -f "$SF_MARKER" "$PHASE_FILE"
|
||||
|
||||
# 10i: terminal phase guard — does not overwrite PHASE:needs_human
|
||||
echo "PHASE:needs_human" > "$PHASE_FILE"
|
||||
# 10i: terminal phase guard — does not overwrite PHASE:escalate
|
||||
echo "PHASE:escalate" > "$PHASE_FILE"
|
||||
printf '{"stop_reason":"rate_limit"}' \
|
||||
| "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER"
|
||||
sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null)
|
||||
if [ "$sf_first" = "PHASE:needs_human" ] && [ ! -f "$SF_MARKER" ]; then
|
||||
ok "StopFailure hook does not overwrite terminal PHASE:needs_human"
|
||||
if [ "$sf_first" = "PHASE:escalate" ] && [ ! -f "$SF_MARKER" ]; then
|
||||
ok "StopFailure hook does not overwrite terminal PHASE:escalate"
|
||||
else
|
||||
fail "StopFailure hook overwrote PHASE:needs_human: first='$sf_first'"
|
||||
fail "StopFailure hook overwrote PHASE:escalate: first='$sf_first'"
|
||||
fi
|
||||
rm -f "$SF_MARKER" "$PHASE_FILE"
|
||||
else
|
||||
|
|
@ -360,31 +360,31 @@ else
|
|||
fail "phase-changed marker did not reset mtime guard"
|
||||
fi
|
||||
|
||||
# ── Test 12: crash handler treats PHASE:needs_human as terminal ───────────
|
||||
# ── Test 12: crash handler treats PHASE:escalate as terminal ───────────
|
||||
# Simulates the monitor_phase_loop crash handler: when a session exits while
|
||||
# the phase file holds PHASE:needs_human, it must be treated as terminal
|
||||
# the phase file holds PHASE:escalate, it must be treated as terminal
|
||||
# (fall through to the phase handler) rather than invoking callback with
|
||||
# PHASE:crashed, which would lose the escalation intent.
|
||||
CRASH_CALLBACK_PHASE=""
|
||||
mock_crash_callback() { CRASH_CALLBACK_PHASE="$1"; }
|
||||
|
||||
echo "PHASE:needs_human" > "$PHASE_FILE"
|
||||
echo "PHASE:escalate" > "$PHASE_FILE"
|
||||
current_phase=$(head -1 "$PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true)
|
||||
case "$current_phase" in
|
||||
PHASE:done|PHASE:failed|PHASE:merged|PHASE:needs_human)
|
||||
PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate)
|
||||
# terminal — fall through to phase handler (correct behavior)
|
||||
mock_crash_callback "$current_phase"
|
||||
;;
|
||||
*)
|
||||
# would invoke callback with PHASE:crashed (incorrect for needs_human)
|
||||
# would invoke callback with PHASE:crashed (incorrect for escalate)
|
||||
mock_crash_callback "PHASE:crashed"
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ "$CRASH_CALLBACK_PHASE" = "PHASE:needs_human" ]; then
|
||||
ok "crash handler preserves PHASE:needs_human (not replaced by PHASE:crashed)"
|
||||
if [ "$CRASH_CALLBACK_PHASE" = "PHASE:escalate" ]; then
|
||||
ok "crash handler preserves PHASE:escalate (not replaced by PHASE:crashed)"
|
||||
else
|
||||
fail "crash handler lost escalation intent: expected PHASE:needs_human, got $CRASH_CALLBACK_PHASE"
|
||||
fail "crash handler lost escalation intent: expected PHASE:escalate, got $CRASH_CALLBACK_PHASE"
|
||||
fi
|
||||
|
||||
# Also verify the other terminal phases still work in crash handler
|
||||
|
|
@ -392,7 +392,7 @@ for tp in "PHASE:done" "PHASE:failed" "PHASE:merged"; do
|
|||
echo "$tp" > "$PHASE_FILE"
|
||||
current_phase=$(head -1 "$PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true)
|
||||
case "$current_phase" in
|
||||
PHASE:done|PHASE:failed|PHASE:merged|PHASE:needs_human)
|
||||
PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate)
|
||||
ok "crash handler treats $tp as terminal"
|
||||
;;
|
||||
*)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue