fix: feat: unified escalation — single PHASE:escalate path for all agents (#510)

Replace PHASE:needs_human with PHASE:escalate across all agent types.
Consolidates 6 overlapping escalation mechanisms into one unified path:
detect → notify via Matrix → session stays alive → human reply injected → resume.

Key changes:
- PHASE:escalate replaces PHASE:needs_human everywhere (16 files)
- CI exhausted now escalates instead of immediately marking blocked
- Matrix listener routes free-text replies to vault tmux sessions
- Vault agent writes PHASE:escalate files for procurement requests
- Supervisor monitors PHASE:escalate sessions in health checks
- 24h timeout on escalation → blocked label + session killed
- All 38 phase protocol tests updated and passing

Supersedes #462, #458, #465.
This commit is contained in:
openhands 2026-03-21 19:39:04 +00:00
parent 725c4d7334
commit 5822dc89d9
18 changed files with 138 additions and 95 deletions

View file

@ -11,7 +11,7 @@ sourced as needed.
| `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) |
| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `CODEBERG_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, Matrix config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) |
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` patterns. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
| `lib/matrix_listener.sh` | Long-poll Matrix sync daemon. Dispatches thread replies to the correct agent via well-known files (`/tmp/{agent}-escalation-reply`). Handles supervisor, gardener, dev, review, vault, and action reply routing. Run as systemd service. | Standalone daemon |
| `lib/matrix_listener.sh` | Long-poll Matrix sync daemon. Dispatches thread replies to the correct agent via tmux session injection (dev, action, vault, review) or well-known files (`/tmp/{agent}-escalation-reply` for supervisor/gardener). Handles all agent reply routing. Run as systemd service. | Standalone daemon |
| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `run_formula_and_monitor()` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh, action-agent.sh |
| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh |
| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) |

View file

@ -330,7 +330,7 @@ monitor_phase_loop() {
local current_phase
current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
case "$current_phase" in
PHASE:done|PHASE:failed|PHASE:merged|PHASE:needs_human)
PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate)
;; # terminal — fall through to phase handler
*)
# Call callback with "crashed" — let agent-specific code handle recovery
@ -410,7 +410,7 @@ monitor_phase_loop() {
fi
return 0
;;
PHASE:failed|PHASE:needs_human)
PHASE:failed|PHASE:escalate)
_MONITOR_LOOP_EXIT="$current_phase"
if type "${callback}" &>/dev/null; then
"$callback" "$current_phase"

View file

@ -168,7 +168,7 @@ formula_phase_callback() {
log "ERROR: could not restart session after crash"
fi
;;
PHASE:done|PHASE:failed|PHASE:needs_human|PHASE:merged)
PHASE:done|PHASE:failed|PHASE:escalate|PHASE:merged)
agent_kill_session "${_MONITOR_SESSION:-$SESSION_NAME}"
;;
esac

View file

@ -30,7 +30,7 @@ reason=$(printf '%s' "$input" | jq -r '
# the PostToolUse hook already recorded the correct terminal phase.
existing=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]')
case "$existing" in
PHASE:done|PHASE:merged|PHASE:needs_human) exit 0 ;;
PHASE:done|PHASE:merged|PHASE:escalate) exit 0 ;;
esac
# Write phase file immediately — orchestrator reads first line as phase sentinel

View file

@ -164,7 +164,7 @@ while true; do
DEV_PHASE_FILE="/tmp/dev-session-${DEV_PROJECT}-${DEV_ISSUE}.phase"
if tmux has-session -t "$DEV_SESSION" 2>/dev/null; then
DEV_CUR_PHASE=$(head -1 "$DEV_PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true)
if [ "$DEV_CUR_PHASE" = "PHASE:needs_human" ] || [ "$DEV_CUR_PHASE" = "PHASE:awaiting_review" ]; then
if [ "$DEV_CUR_PHASE" = "PHASE:escalate" ] || [ "$DEV_CUR_PHASE" = "PHASE:awaiting_review" ]; then
DEV_INJECT_MSG="Human guidance from ${SENDER} in Matrix:
${BODY}
@ -283,26 +283,56 @@ Continue with the action formula based on this response."
fi
;;
vault)
# Parse APPROVE <id> or REJECT <id> from reply
VAULT_CMD=$(echo "$BODY" | tr '[:lower:]' '[:upper:]' | grep -oP '^\s*(APPROVE|REJECT)\s+\S+' | head -1 || true)
if [ -n "$VAULT_CMD" ]; then
VAULT_ACTION=$(echo "$VAULT_CMD" | awk '{print $1}')
VAULT_ID=$(echo "$BODY" | awk '{print $2}') # preserve original case for ID
log "vault dispatch: $VAULT_ACTION $VAULT_ID"
VAULT_DIR="${FACTORY_ROOT}/vault"
if [ "$VAULT_ACTION" = "APPROVE" ]; then
if bash "${VAULT_DIR}/vault-fire.sh" "$VAULT_ID" >> "${VAULT_DIR}/vault.log" 2>&1; then
matrix_send "vault" "✓ approved and fired: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true
# Route reply to vault tmux session if one exists (unified escalation path)
VAULT_ISSUE=$(awk -F'\t' -v id="$THREAD_ROOT" '$1 == id {print $4}' "$THREAD_MAP" 2>/dev/null || true)
VAULT_PROJECT=$(awk -F'\t' -v id="$THREAD_ROOT" '$1 == id {print $5}' "$THREAD_MAP" 2>/dev/null || true)
VAULT_INJECTED=false
if [ -n "$VAULT_ISSUE" ]; then
VAULT_SESSION="vault-${VAULT_PROJECT:-default}-${VAULT_ISSUE}"
if tmux has-session -t "$VAULT_SESSION" 2>/dev/null; then
VAULT_INJECT_MSG="Human reply from ${SENDER} in Matrix:
${BODY}
Interpret this response and decide how to proceed."
VAULT_INJECT_TMP=$(mktemp /tmp/vault-q-inject-XXXXXX)
printf '%s' "$VAULT_INJECT_MSG" > "$VAULT_INJECT_TMP"
tmux load-buffer -b "vault-q-${VAULT_ISSUE}" "$VAULT_INJECT_TMP" || true
tmux paste-buffer -t "$VAULT_SESSION" -b "vault-q-${VAULT_ISSUE}" || true
sleep 0.5
tmux send-keys -t "$VAULT_SESSION" "" Enter || true
tmux delete-buffer -b "vault-q-${VAULT_ISSUE}" 2>/dev/null || true
rm -f "$VAULT_INJECT_TMP"
VAULT_INJECTED=true
log "human reply from ${SENDER} injected into ${VAULT_SESSION}"
if ! grep -qF "$THREAD_ROOT" "$ACKED_FILE" 2>/dev/null; then
matrix_send "vault" "✓ Reply forwarded to vault session" "$THREAD_ROOT" >/dev/null 2>&1 || true
printf '%s\n' "$THREAD_ROOT" >> "$ACKED_FILE"
fi
fi
fi
# Fallback: parse APPROVE/REJECT for non-session vault actions
if [ "$VAULT_INJECTED" = false ]; then
VAULT_CMD=$(echo "$BODY" | tr '[:lower:]' '[:upper:]' | grep -oP '^\s*(APPROVE|REJECT)\s+\S+' | head -1 || true)
if [ -n "$VAULT_CMD" ]; then
VAULT_ACTION=$(echo "$VAULT_CMD" | awk '{print $1}')
VAULT_ID=$(echo "$BODY" | awk '{print $2}') # preserve original case for ID
log "vault dispatch: $VAULT_ACTION $VAULT_ID"
VAULT_DIR="${FACTORY_ROOT}/vault"
if [ "$VAULT_ACTION" = "APPROVE" ]; then
if bash "${VAULT_DIR}/vault-fire.sh" "$VAULT_ID" >> "${VAULT_DIR}/vault.log" 2>&1; then
matrix_send "vault" "✓ approved and fired: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true
else
matrix_send "vault" "✓ approved but fire failed — will retry: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true
fi
else
matrix_send "vault" "✓ approved but fire failed — will retry: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true
bash "${VAULT_DIR}/vault-reject.sh" "$VAULT_ID" "rejected by ${SENDER}" >> "${VAULT_DIR}/vault.log" 2>&1 || true
matrix_send "vault" "✓ rejected: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true
fi
else
bash "${VAULT_DIR}/vault-reject.sh" "$VAULT_ID" "rejected by ${SENDER}" >> "${VAULT_DIR}/vault.log" 2>&1 || true
matrix_send "vault" "✓ rejected: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true
log "vault: free-text reply (no session, no APPROVE/REJECT): ${BODY:0:100}"
matrix_send "vault" "⚠️ No active vault session. Reply with APPROVE <id> or REJECT <id>, or wait for a vault session to start." "$THREAD_ROOT" >/dev/null 2>&1 || true
fi
else
log "vault: unrecognized reply format: ${BODY:0:100}"
matrix_send "vault" "⚠️ Reply with APPROVE <id> or REJECT <id>" "$THREAD_ROOT" >/dev/null 2>&1 || true
fi
;;
*)