From 5822dc89d97cc3298ea8a9129c7025752264268e Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 21 Mar 2026 19:39:04 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20feat:=20unified=20escalation=20=E2=80=94?= =?UTF-8?q?=20single=20PHASE:escalate=20path=20for=20all=20agents=20(#510)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace PHASE:needs_human with PHASE:escalate across all agent types. Consolidates 6 overlapping escalation mechanisms into one unified path: detect → notify via Matrix → session stays alive → human reply injected → resume. Key changes: - PHASE:escalate replaces PHASE:needs_human everywhere (16 files) - CI exhausted now escalates instead of immediately marking blocked - Matrix listener routes free-text replies to vault tmux sessions - Vault agent writes PHASE:escalate files for procurement requests - Supervisor monitors PHASE:escalate sessions in health checks - 24h timeout on escalation → blocked label + session killed - All 38 phase protocol tests updated and passing Supersedes #462, #458, #465. --- AGENTS.md | 2 +- action/action-agent.sh | 2 +- dev/dev-agent.sh | 10 ++++-- dev/phase-handler.sh | 46 +++++++++++++------------ dev/phase-test.sh | 44 ++++++++++++------------ docs/PHASE-PROTOCOL.md | 16 ++++----- formulas/run-supervisor.toml | 2 +- lib/AGENTS.md | 2 +- lib/agent-session.sh | 4 +-- lib/formula-session.sh | 2 +- lib/hooks/on-stop-failure.sh | 2 +- lib/matrix_listener.sh | 64 +++++++++++++++++++++++++---------- planner/prerequisite-tree.md | 4 +-- review/review-poll.sh | 2 +- review/review-pr.sh | 2 +- site/docs/architecture.html | 6 ++-- supervisor/supervisor-poll.sh | 14 ++++---- vault/vault-agent.sh | 9 +++-- 18 files changed, 138 insertions(+), 95 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index f64f775..fb5062e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -144,7 +144,7 @@ at each phase boundary by writing to a phase file (e.g. `/tmp/dev-session-{project}-{issue}.phase`). Key phases: `PHASE:awaiting_ci` → `PHASE:awaiting_review` → `PHASE:done`. -Also: `PHASE:needs_human`, `PHASE:failed`. +Also: `PHASE:escalate` (needs human input), `PHASE:failed`. See [docs/PHASE-PROTOCOL.md](docs/PHASE-PROTOCOL.md) for the complete spec including the orchestrator reaction matrix, sequence diagram, and crash recovery. diff --git a/action/action-agent.sh b/action/action-agent.sh index 68ee8a9..30bc6c9 100755 --- a/action/action-agent.sh +++ b/action/action-agent.sh @@ -351,7 +351,7 @@ case "${_MONITOR_LOOP_EXIT:-}" in notify_ctx \ "session idle for $((IDLE_TIMEOUT / 3600))h — killed" \ "session idle for $((IDLE_TIMEOUT / 3600))h — killed" - # Post diagnostic comment + label blocked (replaces escalation JSONL) + # Post diagnostic comment + label blocked post_blocked_diagnostic "idle_timeout" rm -f "$PHASE_FILE" "${PHASE_FILE%.phase}.context" "$IMPL_SUMMARY_FILE" "$THREAD_FILE" "$SCRATCH_FILE" ;; diff --git a/dev/dev-agent.sh b/dev/dev-agent.sh index 3bfa07b..5a46499 100755 --- a/dev/dev-agent.sh +++ b/dev/dev-agent.sh @@ -599,7 +599,13 @@ curl -sf -X PATCH \\ echo \"PHASE:done\" > \"${PHASE_FILE}\" \`\`\` If merge fails due to conflicts, rebase first then retry the merge. -If merge repeatedly fails, write PHASE:needs_human. +If merge repeatedly fails, write PHASE:escalate with a reason. + +**When you need human help (CI exhausted, merge blocked, stuck on a decision):** +\`\`\`bash +printf 'PHASE:escalate\nReason: %s\n' \"describe what you need\" > \"${PHASE_FILE}\" +\`\`\` +Then STOP and wait. A human will reply via Matrix and the response will be injected. **If refusing (too large, unmet dep, already done):** \`\`\`bash @@ -779,7 +785,7 @@ case "${_MONITOR_LOOP_EXIT:-}" in "session idle for 2h — killed. Marking blocked." \ "session idle for 2h — killed. Marking blocked.${PR_NUMBER:+ PR #${PR_NUMBER}}" fi - # Post diagnostic comment + label issue blocked (replaces escalation JSONL) + # Post diagnostic comment + label issue blocked post_blocked_diagnostic "${_MONITOR_LOOP_EXIT:-idle_timeout}" if [ -n "${PR_NUMBER:-}" ]; then log "keeping worktree (PR #${PR_NUMBER} still open)" diff --git a/dev/phase-handler.sh b/dev/phase-handler.sh index aca12a4..3923d1a 100644 --- a/dev/phase-handler.sh +++ b/dev/phase-handler.sh @@ -43,7 +43,6 @@ source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh" : "${PHASE_POLL_INTERVAL:=30}" # --- Post diagnostic comment + label issue as blocked --- -# Replaces the old escalation JSONL write path. # Captures tmux pane output, posts a structured comment on the issue, removes # in-progress label, and adds the "blocked" label. # @@ -160,6 +159,12 @@ echo "PHASE:awaiting_ci" > "${_pf}" \`\`\` (CI runs again after each push — always write awaiting_ci, not awaiting_review) +**When you need human help (CI exhausted, merge blocked, stuck on a decision):** +\`\`\`bash +printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}" +\`\`\` +Then STOP and wait. A human will reply via Matrix and the response will be injected. + **On unrecoverable failure:** \`\`\`bash printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}" @@ -173,7 +178,7 @@ _PHASE_PROTOCOL_EOF_ # Returns: # 0 = merged successfully # 1 = other failure (conflict, network error, etc.) -# 2 = not enough approvals (HTTP 405) — PHASE:needs_human already written +# 2 = not enough approvals (HTTP 405) — PHASE:escalate already written do_merge() { local pr_num="$1" local merge_response merge_http_code merge_body @@ -193,7 +198,7 @@ do_merge() { # HTTP 405 — merge requirements not met (approvals, branch protection); structural, not transient if [ "$merge_http_code" = "405" ]; then log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" - printf 'PHASE:needs_human\nReason: %s\n' \ + printf 'PHASE:escalate\nReason: %s\n' \ "PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \ > "$PHASE_FILE" return 2 @@ -345,7 +350,7 @@ Write PHASE:awaiting_review to the phase file, then stop and wait for review fee if ! $CI_DONE; then log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s" notify "CI timeout on PR #${PR_NUMBER}" - agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:needs_human if you cannot proceed." + agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed." return 0 fi @@ -395,13 +400,12 @@ Write PHASE:awaiting_review to the phase file, then stop and wait for review fee CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 )) _ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}" if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then - log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — marking blocked" - post_blocked_diagnostic "ci_exhausted after ${CI_FIX_COUNT} attempts (step: ${FAILED_STEP:-unknown})" + log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating" notify_ctx \ - "CI exhausted after ${CI_FIX_COUNT} attempts — issue marked blocked" \ - "CI exhausted after ${CI_FIX_COUNT} attempts on PR #${PR_NUMBER} | Pipeline
Step: ${FAILED_STEP:-unknown} — issue marked blocked" - printf 'PHASE:failed\nReason: ci_exhausted after %d attempts\n' "$CI_FIX_COUNT" > "$PHASE_FILE" - # Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:failed + "CI exhausted after ${CI_FIX_COUNT} attempts — escalating for human help" \ + "CI exhausted after ${CI_FIX_COUNT} attempts on PR #${PR_NUMBER} | Pipeline
Step: ${FAILED_STEP:-unknown} — escalating for human help" + printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE" + # Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate return 0 fi @@ -551,9 +555,9 @@ Rebase onto ${PRIMARY_BRANCH} and push: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" Do NOT merge or close the issue — the orchestrator handles that after CI passes. -If rebase repeatedly fails, write PHASE:needs_human with a reason." +If rebase repeatedly fails, write PHASE:escalate with a reason." fi - # _merge_rc=2: PHASE:needs_human already written by do_merge() + # _merge_rc=2: PHASE:escalate already written by do_merge() break elif [ "$VERDICT" = "REQUEST_CHANGES" ] || [ "$VERDICT" = "DISCUSS" ]; then @@ -618,21 +622,21 @@ Instructions: if ! $REVIEW_FOUND && [ "$REVIEW_POLL_ELAPSED" -ge "$REVIEW_POLL_TIMEOUT" ]; then log "TIMEOUT: no review after 3h" notify "no review received for PR #${PR_NUMBER} after 3h" - agent_inject_into_session "$SESSION_NAME" "TIMEOUT: No review received after 3 hours for PR #${PR_NUMBER}. Write PHASE:needs_human to escalate to a human reviewer." + agent_inject_into_session "$SESSION_NAME" "TIMEOUT: No review received after 3 hours for PR #${PR_NUMBER}. Write PHASE:escalate to escalate to a human reviewer." fi - # ── PHASE: needs_human ────────────────────────────────────────────────────── - elif [ "$phase" = "PHASE:needs_human" ]; then - status "needs human input on issue #${ISSUE}" - HUMAN_REASON=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null | sed 's/^Reason: //' || echo "") + # ── PHASE: escalate ────────────────────────────────────────────────────── + elif [ "$phase" = "PHASE:escalate" ]; then + status "escalated — waiting for human input on issue #${ISSUE}" + ESCALATE_REASON=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null | sed 's/^Reason: //' || echo "") _issue_url="${CODEBERG_WEB}/issues/${ISSUE}" _pr_link="" [ -n "${PR_NUMBER:-}" ] && _pr_link=" | PR #${PR_NUMBER}" notify_ctx \ - "⚠️ Issue #${ISSUE} (PR #${PR_NUMBER:-none}) needs human input.${HUMAN_REASON:+ Reason: ${HUMAN_REASON}}" \ - "⚠️ Issue #${ISSUE}${_pr_link} needs human input.${HUMAN_REASON:+ Reason: ${HUMAN_REASON}}
Reply in this thread to send guidance to the dev agent." - log "phase: needs_human — notified via Matrix, waiting for external injection" - # Don't inject anything — supervisor-run.sh (#81) injects human replies + "⚠️ Issue #${ISSUE} (PR #${PR_NUMBER:-none}) escalated — needs human input.${ESCALATE_REASON:+ Reason: ${ESCALATE_REASON}}" \ + "⚠️ Issue #${ISSUE}${_pr_link} escalated — needs human input.${ESCALATE_REASON:+ Reason: ${ESCALATE_REASON}}
Reply in this thread to send guidance to the agent." + log "phase: escalate — notified via Matrix, session stays alive waiting for reply" + # Session stays alive — matrix_listener injects human reply directly # ── PHASE: done ───────────────────────────────────────────────────────────── # PR merged and issue closed (by orchestrator or Claude). Just clean up local state. diff --git a/dev/phase-test.sh b/dev/phase-test.sh index 2ce3309..7a684c3 100755 --- a/dev/phase-test.sh +++ b/dev/phase-test.sh @@ -51,7 +51,7 @@ check_phase() { check_phase "PHASE:awaiting_ci" check_phase "PHASE:awaiting_review" -check_phase "PHASE:needs_human" +check_phase "PHASE:escalate" check_phase "PHASE:done" check_phase "PHASE:failed" @@ -109,14 +109,14 @@ fi is_valid_phase() { local p="$1" case "$p" in - PHASE:awaiting_ci|PHASE:awaiting_review|PHASE:needs_human|PHASE:done|PHASE:failed) + PHASE:awaiting_ci|PHASE:awaiting_review|PHASE:escalate|PHASE:done|PHASE:failed) return 0 ;; *) return 1 ;; esac } -for p in "PHASE:awaiting_ci" "PHASE:awaiting_review" "PHASE:needs_human" \ +for p in "PHASE:awaiting_ci" "PHASE:awaiting_review" "PHASE:escalate" \ "PHASE:done" "PHASE:failed"; do if is_valid_phase "$p"; then ok "is_valid_phase: $p" @@ -131,14 +131,14 @@ else fail "is_valid_phase should reject PHASE:unknown" fi -# ── Test 8: needs_human mtime guard — no duplicate notify on second poll ───── +# ── Test 8: escalate mtime guard — no duplicate notify on second poll ───── # Simulates the LAST_PHASE_MTIME guard from dev-agent.sh: after the orchestrator -# handles PHASE:needs_human once, subsequent poll cycles must not re-trigger +# handles PHASE:escalate once, subsequent poll cycles must not re-trigger # notify() if the phase file was not rewritten. NOTIFY_COUNT=0 mock_notify() { NOTIFY_COUNT=$((NOTIFY_COUNT + 1)); } -echo "PHASE:needs_human" > "$PHASE_FILE" +echo "PHASE:escalate" > "$PHASE_FILE" LAST_PHASE_MTIME=0 # --- First poll cycle: phase file is newer than LAST_PHASE_MTIME --- @@ -162,9 +162,9 @@ if [ -n "$CURRENT_PHASE" ] && [ "$PHASE_MTIME" -gt "$LAST_PHASE_MTIME" ]; then fi if [ "$NOTIFY_COUNT" -eq 1 ]; then - ok "needs_human mtime guard: notify called once, blocked on second poll" + ok "escalate mtime guard: notify called once, blocked on second poll" else - fail "needs_human mtime guard: expected 1 notify call, got $NOTIFY_COUNT" + fail "escalate mtime guard: expected 1 notify call, got $NOTIFY_COUNT" fi # ── Test 9: PostToolUse hook detects writes, ignores reads ──────────────── @@ -317,15 +317,15 @@ if [ -x "$STOP_FAILURE_HOOK" ]; then fi rm -f "$SF_MARKER" "$PHASE_FILE" - # 10i: terminal phase guard — does not overwrite PHASE:needs_human - echo "PHASE:needs_human" > "$PHASE_FILE" + # 10i: terminal phase guard — does not overwrite PHASE:escalate + echo "PHASE:escalate" > "$PHASE_FILE" printf '{"stop_reason":"rate_limit"}' \ | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER" sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null) - if [ "$sf_first" = "PHASE:needs_human" ] && [ ! -f "$SF_MARKER" ]; then - ok "StopFailure hook does not overwrite terminal PHASE:needs_human" + if [ "$sf_first" = "PHASE:escalate" ] && [ ! -f "$SF_MARKER" ]; then + ok "StopFailure hook does not overwrite terminal PHASE:escalate" else - fail "StopFailure hook overwrote PHASE:needs_human: first='$sf_first'" + fail "StopFailure hook overwrote PHASE:escalate: first='$sf_first'" fi rm -f "$SF_MARKER" "$PHASE_FILE" else @@ -360,31 +360,31 @@ else fail "phase-changed marker did not reset mtime guard" fi -# ── Test 12: crash handler treats PHASE:needs_human as terminal ─────────── +# ── Test 12: crash handler treats PHASE:escalate as terminal ─────────── # Simulates the monitor_phase_loop crash handler: when a session exits while -# the phase file holds PHASE:needs_human, it must be treated as terminal +# the phase file holds PHASE:escalate, it must be treated as terminal # (fall through to the phase handler) rather than invoking callback with # PHASE:crashed, which would lose the escalation intent. CRASH_CALLBACK_PHASE="" mock_crash_callback() { CRASH_CALLBACK_PHASE="$1"; } -echo "PHASE:needs_human" > "$PHASE_FILE" +echo "PHASE:escalate" > "$PHASE_FILE" current_phase=$(head -1 "$PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true) case "$current_phase" in - PHASE:done|PHASE:failed|PHASE:merged|PHASE:needs_human) + PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate) # terminal — fall through to phase handler (correct behavior) mock_crash_callback "$current_phase" ;; *) - # would invoke callback with PHASE:crashed (incorrect for needs_human) + # would invoke callback with PHASE:crashed (incorrect for escalate) mock_crash_callback "PHASE:crashed" ;; esac -if [ "$CRASH_CALLBACK_PHASE" = "PHASE:needs_human" ]; then - ok "crash handler preserves PHASE:needs_human (not replaced by PHASE:crashed)" +if [ "$CRASH_CALLBACK_PHASE" = "PHASE:escalate" ]; then + ok "crash handler preserves PHASE:escalate (not replaced by PHASE:crashed)" else - fail "crash handler lost escalation intent: expected PHASE:needs_human, got $CRASH_CALLBACK_PHASE" + fail "crash handler lost escalation intent: expected PHASE:escalate, got $CRASH_CALLBACK_PHASE" fi # Also verify the other terminal phases still work in crash handler @@ -392,7 +392,7 @@ for tp in "PHASE:done" "PHASE:failed" "PHASE:merged"; do echo "$tp" > "$PHASE_FILE" current_phase=$(head -1 "$PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true) case "$current_phase" in - PHASE:done|PHASE:failed|PHASE:merged|PHASE:needs_human) + PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate) ok "crash handler treats $tp as terminal" ;; *) diff --git a/docs/PHASE-PROTOCOL.md b/docs/PHASE-PROTOCOL.md index a9a0894..45ac680 100644 --- a/docs/PHASE-PROTOCOL.md +++ b/docs/PHASE-PROTOCOL.md @@ -30,7 +30,7 @@ Claude writes exactly one of these lines to the phase file when a phase ends: |----------|---------|---------------------| | `PHASE:awaiting_ci` | PR pushed, waiting for CI to run | Poll CI; inject result when done | | `PHASE:awaiting_review` | CI passed, PR open, waiting for review | Wait for `review-poll` to inject feedback | -| `PHASE:needs_human` | Blocked on human decision | Send Matrix notification; wait for reply | +| `PHASE:escalate` | Needs human input (any reason) | Send Matrix notification; session stays alive; 24h timeout → blocked | | `PHASE:done` | Work complete, PR merged | Verify merge, kill tmux session, clean up | | `PHASE:failed` | Unrecoverable failure | Escalate to gardener/supervisor | @@ -46,7 +46,7 @@ echo "PHASE:awaiting_ci" > "$PHASE_FILE" echo "PHASE:awaiting_review" > "$PHASE_FILE" # Signal needs human -echo "PHASE:needs_human" > "$PHASE_FILE" +echo "PHASE:escalate" > "$PHASE_FILE" # Signal done echo "PHASE:done" > "$PHASE_FILE" @@ -77,17 +77,16 @@ PHASE:awaiting_review → wait for review-poll.sh to post review comment on APPROVE → inject "approved" into session on timeout (3h) → inject "no review, escalating" -PHASE:needs_human → send Matrix notification with issue/PR link - on reply → supervisor-poll.sh injects reply into tmux session - (gardener-poll.sh as backup if supervisor missed it) - reply file: /tmp/dev-escalation-reply (written by matrix_listener.sh) - on timeout → re-notify at 6h, escalate at 24h (supervisor-poll.sh) +PHASE:escalate → send Matrix notification with context (issue/PR link, reason) + session stays alive waiting for human reply + on reply → matrix_listener.sh injects reply into tmux session + on timeout → 24h: label issue blocked, kill session PHASE:done → verify PR merged on Codeberg if merged → kill tmux session, clean labels, close issue if not → inject "PR not merged yet" into session -PHASE:failed → write escalation to supervisor/escalations-{project}.jsonl +PHASE:failed → label issue blocked, post diagnostic comment kill tmux session restore backlog label on issue ``` @@ -171,7 +170,6 @@ file and git history. | `/tmp/dev-session-{proj}-{issue}.phase` | Claude (in session) | Current phase | | `/tmp/ci-result-{proj}-{issue}.txt` | Orchestrator | Last CI output for injection | | `/tmp/dev-{proj}-{issue}.log` | Orchestrator | Session transcript (aspirational — path TBD when tmux session manager is implemented in #80) | -| `/tmp/dev-escalation-reply` | matrix_listener.sh | Human reply to `needs_human` escalation (consumed by supervisor-poll.sh) | | `/tmp/dev-renotify-{proj}-{issue}` | supervisor-poll.sh | Marker to prevent duplicate 6h re-notifications | | `WORKTREE` (git worktree) | dev-agent.sh | Code checkpoint | diff --git a/formulas/run-supervisor.toml b/formulas/run-supervisor.toml index e69bf23..0a685d0 100644 --- a/formulas/run-supervisor.toml +++ b/formulas/run-supervisor.toml @@ -67,7 +67,7 @@ Categorize every finding from the metrics into priority levels. - Git repo on wrong branch or in broken rebase state - Pipeline stalled: backlog issues exist but no agent ran for > 20min - Dev-agent blocked: last N polls all report "no ready issues" -- Dev sessions in PHASE:needs_human for > 24h +- Dev/action sessions in PHASE:escalate for > 24h (escalation timeout) ### P3 — Factory degraded - PRs stale: CI finished >20min ago AND no git push to the PR branch since CI completed diff --git a/lib/AGENTS.md b/lib/AGENTS.md index 20827be..aefcfc7 100644 --- a/lib/AGENTS.md +++ b/lib/AGENTS.md @@ -11,7 +11,7 @@ sourced as needed. | `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) | | `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `CODEBERG_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, Matrix config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) | | `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` patterns. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll | -| `lib/matrix_listener.sh` | Long-poll Matrix sync daemon. Dispatches thread replies to the correct agent via well-known files (`/tmp/{agent}-escalation-reply`). Handles supervisor, gardener, dev, review, vault, and action reply routing. Run as systemd service. | Standalone daemon | +| `lib/matrix_listener.sh` | Long-poll Matrix sync daemon. Dispatches thread replies to the correct agent via tmux session injection (dev, action, vault, review) or well-known files (`/tmp/{agent}-escalation-reply` for supervisor/gardener). Handles all agent reply routing. Run as systemd service. | Standalone daemon | | `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `run_formula_and_monitor()` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh, action-agent.sh | | `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh | | `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) | diff --git a/lib/agent-session.sh b/lib/agent-session.sh index 79cc66a..32c4f76 100644 --- a/lib/agent-session.sh +++ b/lib/agent-session.sh @@ -330,7 +330,7 @@ monitor_phase_loop() { local current_phase current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) case "$current_phase" in - PHASE:done|PHASE:failed|PHASE:merged|PHASE:needs_human) + PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate) ;; # terminal — fall through to phase handler *) # Call callback with "crashed" — let agent-specific code handle recovery @@ -410,7 +410,7 @@ monitor_phase_loop() { fi return 0 ;; - PHASE:failed|PHASE:needs_human) + PHASE:failed|PHASE:escalate) _MONITOR_LOOP_EXIT="$current_phase" if type "${callback}" &>/dev/null; then "$callback" "$current_phase" diff --git a/lib/formula-session.sh b/lib/formula-session.sh index 5940482..3ec8b8f 100644 --- a/lib/formula-session.sh +++ b/lib/formula-session.sh @@ -168,7 +168,7 @@ formula_phase_callback() { log "ERROR: could not restart session after crash" fi ;; - PHASE:done|PHASE:failed|PHASE:needs_human|PHASE:merged) + PHASE:done|PHASE:failed|PHASE:escalate|PHASE:merged) agent_kill_session "${_MONITOR_SESSION:-$SESSION_NAME}" ;; esac diff --git a/lib/hooks/on-stop-failure.sh b/lib/hooks/on-stop-failure.sh index 99b78ee..6c78ade 100755 --- a/lib/hooks/on-stop-failure.sh +++ b/lib/hooks/on-stop-failure.sh @@ -30,7 +30,7 @@ reason=$(printf '%s' "$input" | jq -r ' # the PostToolUse hook already recorded the correct terminal phase. existing=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]') case "$existing" in - PHASE:done|PHASE:merged|PHASE:needs_human) exit 0 ;; + PHASE:done|PHASE:merged|PHASE:escalate) exit 0 ;; esac # Write phase file immediately — orchestrator reads first line as phase sentinel diff --git a/lib/matrix_listener.sh b/lib/matrix_listener.sh index ef6d0b5..ebee8f1 100755 --- a/lib/matrix_listener.sh +++ b/lib/matrix_listener.sh @@ -164,7 +164,7 @@ while true; do DEV_PHASE_FILE="/tmp/dev-session-${DEV_PROJECT}-${DEV_ISSUE}.phase" if tmux has-session -t "$DEV_SESSION" 2>/dev/null; then DEV_CUR_PHASE=$(head -1 "$DEV_PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true) - if [ "$DEV_CUR_PHASE" = "PHASE:needs_human" ] || [ "$DEV_CUR_PHASE" = "PHASE:awaiting_review" ]; then + if [ "$DEV_CUR_PHASE" = "PHASE:escalate" ] || [ "$DEV_CUR_PHASE" = "PHASE:awaiting_review" ]; then DEV_INJECT_MSG="Human guidance from ${SENDER} in Matrix: ${BODY} @@ -283,26 +283,56 @@ Continue with the action formula based on this response." fi ;; vault) - # Parse APPROVE or REJECT from reply - VAULT_CMD=$(echo "$BODY" | tr '[:lower:]' '[:upper:]' | grep -oP '^\s*(APPROVE|REJECT)\s+\S+' | head -1 || true) - if [ -n "$VAULT_CMD" ]; then - VAULT_ACTION=$(echo "$VAULT_CMD" | awk '{print $1}') - VAULT_ID=$(echo "$BODY" | awk '{print $2}') # preserve original case for ID - log "vault dispatch: $VAULT_ACTION $VAULT_ID" - VAULT_DIR="${FACTORY_ROOT}/vault" - if [ "$VAULT_ACTION" = "APPROVE" ]; then - if bash "${VAULT_DIR}/vault-fire.sh" "$VAULT_ID" >> "${VAULT_DIR}/vault.log" 2>&1; then - matrix_send "vault" "✓ approved and fired: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true + # Route reply to vault tmux session if one exists (unified escalation path) + VAULT_ISSUE=$(awk -F'\t' -v id="$THREAD_ROOT" '$1 == id {print $4}' "$THREAD_MAP" 2>/dev/null || true) + VAULT_PROJECT=$(awk -F'\t' -v id="$THREAD_ROOT" '$1 == id {print $5}' "$THREAD_MAP" 2>/dev/null || true) + VAULT_INJECTED=false + if [ -n "$VAULT_ISSUE" ]; then + VAULT_SESSION="vault-${VAULT_PROJECT:-default}-${VAULT_ISSUE}" + if tmux has-session -t "$VAULT_SESSION" 2>/dev/null; then + VAULT_INJECT_MSG="Human reply from ${SENDER} in Matrix: + +${BODY} + +Interpret this response and decide how to proceed." + VAULT_INJECT_TMP=$(mktemp /tmp/vault-q-inject-XXXXXX) + printf '%s' "$VAULT_INJECT_MSG" > "$VAULT_INJECT_TMP" + tmux load-buffer -b "vault-q-${VAULT_ISSUE}" "$VAULT_INJECT_TMP" || true + tmux paste-buffer -t "$VAULT_SESSION" -b "vault-q-${VAULT_ISSUE}" || true + sleep 0.5 + tmux send-keys -t "$VAULT_SESSION" "" Enter || true + tmux delete-buffer -b "vault-q-${VAULT_ISSUE}" 2>/dev/null || true + rm -f "$VAULT_INJECT_TMP" + VAULT_INJECTED=true + log "human reply from ${SENDER} injected into ${VAULT_SESSION}" + if ! grep -qF "$THREAD_ROOT" "$ACKED_FILE" 2>/dev/null; then + matrix_send "vault" "✓ Reply forwarded to vault session" "$THREAD_ROOT" >/dev/null 2>&1 || true + printf '%s\n' "$THREAD_ROOT" >> "$ACKED_FILE" + fi + fi + fi + # Fallback: parse APPROVE/REJECT for non-session vault actions + if [ "$VAULT_INJECTED" = false ]; then + VAULT_CMD=$(echo "$BODY" | tr '[:lower:]' '[:upper:]' | grep -oP '^\s*(APPROVE|REJECT)\s+\S+' | head -1 || true) + if [ -n "$VAULT_CMD" ]; then + VAULT_ACTION=$(echo "$VAULT_CMD" | awk '{print $1}') + VAULT_ID=$(echo "$BODY" | awk '{print $2}') # preserve original case for ID + log "vault dispatch: $VAULT_ACTION $VAULT_ID" + VAULT_DIR="${FACTORY_ROOT}/vault" + if [ "$VAULT_ACTION" = "APPROVE" ]; then + if bash "${VAULT_DIR}/vault-fire.sh" "$VAULT_ID" >> "${VAULT_DIR}/vault.log" 2>&1; then + matrix_send "vault" "✓ approved and fired: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true + else + matrix_send "vault" "✓ approved but fire failed — will retry: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true + fi else - matrix_send "vault" "✓ approved but fire failed — will retry: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true + bash "${VAULT_DIR}/vault-reject.sh" "$VAULT_ID" "rejected by ${SENDER}" >> "${VAULT_DIR}/vault.log" 2>&1 || true + matrix_send "vault" "✓ rejected: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true fi else - bash "${VAULT_DIR}/vault-reject.sh" "$VAULT_ID" "rejected by ${SENDER}" >> "${VAULT_DIR}/vault.log" 2>&1 || true - matrix_send "vault" "✓ rejected: ${VAULT_ID}" "$THREAD_ROOT" >/dev/null 2>&1 || true + log "vault: free-text reply (no session, no APPROVE/REJECT): ${BODY:0:100}" + matrix_send "vault" "⚠️ No active vault session. Reply with APPROVE or REJECT , or wait for a vault session to start." "$THREAD_ROOT" >/dev/null 2>&1 || true fi - else - log "vault: unrecognized reply format: ${BODY:0:100}" - matrix_send "vault" "⚠️ Reply with APPROVE or REJECT " "$THREAD_ROOT" >/dev/null 2>&1 || true fi ;; *) diff --git a/planner/prerequisite-tree.md b/planner/prerequisite-tree.md index 9fc2be4..d959455 100644 --- a/planner/prerequisite-tree.md +++ b/planner/prerequisite-tree.md @@ -28,8 +28,8 @@ Status: BLOCKED — prerequisite of prerequisite unresolved Status: BLOCKED — prerequisite chain unresolved ## Objective: Unified escalation path (#510) -- [ ] Supervisor escalates prolonged needs_human (#465) -Status: BLOCKED — 1 prerequisite unresolved +- [x] PHASE:escalate replaces PHASE:needs_human (supersedes #465) +Status: IN PROGRESS ## Objective: Vault as procurement gate + RESOURCES.md inventory (#504) - [x] RESOURCES.md exists diff --git a/review/review-poll.sh b/review/review-poll.sh index c933aeb..72114ba 100755 --- a/review/review-poll.sh +++ b/review/review-poll.sh @@ -156,7 +156,7 @@ curl -sf -X PATCH \\ echo \"PHASE:done\" > \"${phase_file}\" If merge fails due to conflicts, rebase first then retry. -If merge repeatedly fails, write PHASE:needs_human." +If merge repeatedly fails, write PHASE:escalate with a reason." elif [ "${verdict}" = "REQUEST_CHANGES" ] || [ "${verdict}" = "DISCUSS" ]; then inject_msg="Review: ${verdict} on PR #${pr_num}: diff --git a/review/review-pr.sh b/review/review-pr.sh index b5b90a7..18d144a 100755 --- a/review/review-pr.sh +++ b/review/review-pr.sh @@ -124,7 +124,7 @@ review_cb() { [ "$_REVIEW_CRASH" -gt 0 ] && return 0; _REVIEW_CRASH=$((_REVIEW_CRASH + 1)) create_agent_session "${_MONITOR_SESSION}" "$WORKTREE" "$PHASE_FILE" 2>/dev/null && \ agent_inject_into_session "${_MONITOR_SESSION}" "$PROMPT" ;; - PHASE:done|PHASE:failed|PHASE:needs_human) agent_kill_session "${_MONITOR_SESSION}" ;; + PHASE:done|PHASE:failed|PHASE:escalate) agent_kill_session "${_MONITOR_SESSION}" ;; esac } monitor_phase_loop "$PHASE_FILE" 600 "review_cb" "$SESSION" diff --git a/site/docs/architecture.html b/site/docs/architecture.html index 147c43b..f4b35d7 100644 --- a/site/docs/architecture.html +++ b/site/docs/architecture.html @@ -431,9 +431,9 @@ Review-agent injects feedback - needs_human - Blocked on a human decision - Matrix notification sent + escalate + Needs human input (any reason) + Matrix notification sent; 24h timeout → blocked done diff --git a/supervisor/supervisor-poll.sh b/supervisor/supervisor-poll.sh index b946a10..bdc8529 100755 --- a/supervisor/supervisor-poll.sh +++ b/supervisor/supervisor-poll.sh @@ -565,16 +565,16 @@ check_project() { '{ts:$ts,type:"dev",project:$proj,issues_in_backlog:$backlog,issues_blocked:$blocked,pr_open:$prs}' 2>/dev/null)" 2>/dev/null || true # =========================================================================== - # P2d: NEEDS_HUMAN — inject human replies into blocked dev sessions + # P2d: ESCALATE — inject human replies into escalated dev sessions # =========================================================================== - status "P2: ${proj_name}: checking needs_human sessions" + status "P2: ${proj_name}: checking escalate sessions" HUMAN_REPLY_FILE="/tmp/dev-escalation-reply" for _nh_phase_file in /tmp/dev-session-"${proj_name}"-*.phase; do [ -f "$_nh_phase_file" ] || continue _nh_phase=$(head -1 "$_nh_phase_file" 2>/dev/null | tr -d '[:space:]' || true) - [ "$_nh_phase" = "PHASE:needs_human" ] || continue + [ "$_nh_phase" = "PHASE:escalate" ] || continue _nh_issue=$(basename "$_nh_phase_file" .phase) _nh_issue="${_nh_issue#dev-session-${proj_name}-}" @@ -583,7 +583,7 @@ check_project() { # Check tmux session is alive if ! tmux has-session -t "$_nh_session" 2>/dev/null; then - flog "${proj_name}: #${_nh_issue} phase=needs_human but tmux session gone" + flog "${proj_name}: #${_nh_issue} phase=escalate but tmux session gone" continue fi @@ -621,14 +621,14 @@ Instructions: _nh_age=$(( _nh_now - _nh_mtime )) if [ "$_nh_age" -gt 86400 ]; then - p2 "${proj_name}: Dev session #${_nh_issue} stuck in needs_human for >24h" + p2 "${proj_name}: Dev session #${_nh_issue} stuck in escalate for >24h" elif [ "$_nh_age" -gt 21600 ]; then _nh_renotify="/tmp/dev-renotify-${proj_name}-${_nh_issue}" if [ ! -f "$_nh_renotify" ]; then _nh_age_h=$(( _nh_age / 3600 )) matrix_send "dev" "⏰ Reminder: Issue #${_nh_issue} still needs human input (waiting ${_nh_age_h}h)" 2>/dev/null || true touch "$_nh_renotify" - flog "${proj_name}: #${_nh_issue} re-notified (needs_human for ${_nh_age_h}h)" + flog "${proj_name}: #${_nh_issue} re-notified (escalate for ${_nh_age_h}h)" fi fi fi @@ -661,7 +661,7 @@ Instructions: _phase_file="/tmp/dev-session-${proj_name}-${_sess_issue}.phase" _curr_phase=$(head -1 "$_phase_file" 2>/dev/null | tr -d '[:space:]' || true) case "${_curr_phase:-}" in - PHASE:needs_human|PHASE:awaiting_ci|PHASE:awaiting_review) + PHASE:escalate|PHASE:awaiting_ci|PHASE:awaiting_review) continue # session has legitimate pending work ;; esac diff --git a/vault/vault-agent.sh b/vault/vault-agent.sh index 8e2b813..7c117ad 100755 --- a/vault/vault-agent.sh +++ b/vault/vault-agent.sh @@ -71,8 +71,13 @@ ${ACTIONS_BATCH} - vault-reject.sh: bash ${VAULT_DIR}/vault-reject.sh \"\" - matrix_send is available after: source ${FACTORY_ROOT}/lib/env.sh -Process each action now. For auto-approve, fire immediately. For escalate, -send Matrix message and mark as escalated. For reject, call vault-reject.sh." +Process each action now. For auto-approve, fire immediately. For reject, call vault-reject.sh. + +For actions that need human approval (escalate), write a PHASE:escalate file +to signal the unified escalation path: + printf 'PHASE:escalate\nReason: vault procurement — %s\n' '' \\ + > /tmp/vault-escalate-.phase +Then send a Matrix message with context about what needs approval." CLAUDE_OUTPUT=$(timeout "$CLAUDE_TIMEOUT" claude -p "$PROMPT" \ --model sonnet \