#!/usr/bin/env bash # phase-test.sh — Integration test for the phase-signaling protocol # # Simulates a Claude session writing phases and an orchestrator reading them. # Tests all phase values and verifies the read/write contract. # # Usage: bash dev/phase-test.sh set -euo pipefail # Source canonical read_phase() from shared library source "$(dirname "$0")/../lib/agent-session.sh" PROJECT="testproject" ISSUE="999" PHASE_FILE="/tmp/dev-session-${PROJECT}-${ISSUE}.phase" PASS=0 FAIL=0 ok() { printf '[PASS] %s\n' "$1" PASS=$((PASS + 1)) } fail() { printf '[FAIL] %s\n' "$1" FAIL=$((FAIL + 1)) } # Cleanup rm -f "$PHASE_FILE" # ── Test 1: phase file path convention ──────────────────────────────────────── expected_path="/tmp/dev-session-${PROJECT}-${ISSUE}.phase" if [ "$PHASE_FILE" = "$expected_path" ]; then ok "phase file path follows /tmp/dev-session-{project}-{issue}.phase convention" else fail "phase file path mismatch: got $PHASE_FILE, expected $expected_path" fi # ── Test 2: write and read each phase sentinel ───────────────────────────────── check_phase() { local sentinel="$1" echo "$sentinel" > "$PHASE_FILE" local got got=$(tr -d '[:space:]' < "$PHASE_FILE") if [ "$got" = "$sentinel" ]; then ok "write/read: $sentinel" else fail "write/read: expected '$sentinel', got '$got'" fi } check_phase "PHASE:awaiting_ci" check_phase "PHASE:awaiting_review" check_phase "PHASE:escalate" check_phase "PHASE:done" check_phase "PHASE:failed" # ── Test 3: write overwrites (not appends) ───────────────────────────────────── echo "PHASE:awaiting_ci" > "$PHASE_FILE" echo "PHASE:awaiting_review" > "$PHASE_FILE" line_count=$(wc -l < "$PHASE_FILE") file_content=$(< "$PHASE_FILE") if [ "$line_count" -eq 1 ]; then ok "phase file overwrite (single line after two writes)" else fail "phase file should have 1 line, got $line_count" fi if [ "$file_content" = "PHASE:awaiting_review" ]; then ok "phase file overwrite (content is second write, not first)" else fail "phase file content should be 'PHASE:awaiting_review', got '$file_content'" fi # ── Test 4: failed phase with reason ────────────────────────────────────────── printf 'PHASE:failed\nReason: %s\n' "shellcheck failed on ci.sh" > "$PHASE_FILE" first_line=$(head -1 "$PHASE_FILE") second_line=$(sed -n '2p' "$PHASE_FILE") if [ "$first_line" = "PHASE:failed" ] && echo "$second_line" | grep -q "^Reason:"; then ok "PHASE:failed with reason line" else fail "PHASE:failed format: first='$first_line' second='$second_line'" fi # ── Test 5: orchestrator read function (canonical read_phase from lib/agent-session.sh) echo "PHASE:awaiting_ci" > "$PHASE_FILE" phase=$(read_phase "$PHASE_FILE") if [ "$phase" = "PHASE:awaiting_ci" ]; then ok "orchestrator read_phase() extracts first line" else fail "orchestrator read_phase() got: '$phase'" fi # ── Test 6: missing file returns empty ──────────────────────────────────────── rm -f "$PHASE_FILE" phase=$(read_phase "$PHASE_FILE") if [ -z "$phase" ]; then ok "missing phase file returns empty string" else fail "missing phase file should return empty, got: '$phase'" fi # ── Test 7: all valid phase values are recognized ───────────────────────────── is_valid_phase() { local p="$1" case "$p" in PHASE:awaiting_ci|PHASE:awaiting_review|PHASE:escalate|PHASE:done|PHASE:failed) return 0 ;; *) return 1 ;; esac } for p in "PHASE:awaiting_ci" "PHASE:awaiting_review" "PHASE:escalate" \ "PHASE:done" "PHASE:failed"; do if is_valid_phase "$p"; then ok "is_valid_phase: $p" else fail "is_valid_phase rejected valid phase: $p" fi done if ! is_valid_phase "PHASE:unknown"; then ok "is_valid_phase rejects unknown phase" else fail "is_valid_phase should reject PHASE:unknown" fi # ── Test 8: escalate mtime guard — no duplicate notify on second poll ───── # Simulates the LAST_PHASE_MTIME guard from dev-agent.sh: after the orchestrator # handles PHASE:escalate once, subsequent poll cycles must not re-trigger # notify() if the phase file was not rewritten. NOTIFY_COUNT=0 mock_notify() { NOTIFY_COUNT=$((NOTIFY_COUNT + 1)); } echo "PHASE:escalate" > "$PHASE_FILE" LAST_PHASE_MTIME=0 # --- First poll cycle: phase file is newer than LAST_PHASE_MTIME --- PHASE_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) CURRENT_PHASE=$(tr -d '[:space:]' < "$PHASE_FILE") if [ -n "$CURRENT_PHASE" ] && [ "$PHASE_MTIME" -gt "$LAST_PHASE_MTIME" ]; then # Orchestrator would handle the phase and call notify() mock_notify LAST_PHASE_MTIME="$PHASE_MTIME" fi # --- Second poll cycle: file not touched, mtime unchanged --- sleep 1 # ensure wall-clock advances past the original mtime PHASE_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) CURRENT_PHASE=$(tr -d '[:space:]' < "$PHASE_FILE") if [ -n "$CURRENT_PHASE" ] && [ "$PHASE_MTIME" -gt "$LAST_PHASE_MTIME" ]; then # This branch must NOT execute — mtime guard should block it mock_notify fi if [ "$NOTIFY_COUNT" -eq 1 ]; then ok "escalate mtime guard: notify called once, blocked on second poll" else fail "escalate mtime guard: expected 1 notify call, got $NOTIFY_COUNT" fi # ── Test 9: PostToolUse hook detects writes, ignores reads ──────────────── HOOK_SCRIPT="$(dirname "$0")/../lib/hooks/on-phase-change.sh" MARKER_FILE="/tmp/phase-changed-test-session.marker" rm -f "$MARKER_FILE" if [ -x "$HOOK_SCRIPT" ]; then # 9a: Bash redirect to phase file → marker written printf '{"tool_name":"Bash","tool_input":{"command":"echo PHASE:awaiting_ci > %s"}}' \ "$PHASE_FILE" | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE" if [ -f "$MARKER_FILE" ]; then ok "PostToolUse hook writes marker on Bash redirect to phase file" else fail "PostToolUse hook did not write marker on Bash redirect" fi rm -f "$MARKER_FILE" # 9b: Write tool targeting phase file → marker written printf '{"tool_name":"Write","tool_input":{"file_path":"%s","content":"PHASE:done"}}' \ "$PHASE_FILE" | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE" if [ -f "$MARKER_FILE" ]; then ok "PostToolUse hook writes marker on Write tool to phase file" else fail "PostToolUse hook did not write marker on Write tool" fi rm -f "$MARKER_FILE" # 9c: Bash read of phase file (cat) → NO marker (not a write) printf '{"tool_name":"Bash","tool_input":{"command":"cat %s"}}' \ "$PHASE_FILE" | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE" if [ ! -f "$MARKER_FILE" ]; then ok "PostToolUse hook ignores Bash read of phase file (no false positive)" else fail "PostToolUse hook wrote marker for Bash read (false positive)" fi rm -f "$MARKER_FILE" # 9d: Unrelated Bash command → NO marker printf '{"tool_name":"Bash","tool_input":{"command":"echo hello > /tmp/other-file"}}' \ | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE" if [ ! -f "$MARKER_FILE" ]; then ok "PostToolUse hook skips marker for unrelated operations" else fail "PostToolUse hook wrote marker for unrelated operation (false positive)" fi rm -f "$MARKER_FILE" # 9e: Write tool targeting different file → NO marker printf '{"tool_name":"Write","tool_input":{"file_path":"/tmp/other-file","content":"hello"}}' \ | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE" if [ ! -f "$MARKER_FILE" ]; then ok "PostToolUse hook skips marker for Write to different file" else fail "PostToolUse hook wrote marker for Write to different file (false positive)" fi rm -f "$MARKER_FILE" else fail "PostToolUse hook script not found or not executable: $HOOK_SCRIPT" fi # ── Test 10: StopFailure hook writes phase file and marker on API error ─── STOP_FAILURE_HOOK="$(dirname "$0")/../lib/hooks/on-stop-failure.sh" SF_MARKER="/tmp/phase-changed-test-sf.marker" rm -f "$SF_MARKER" "$PHASE_FILE" if [ -x "$STOP_FAILURE_HOOK" ]; then # 10a: rate_limit stop reason → PHASE:failed with api_error reason printf '{"stop_reason":"rate_limit"}' \ | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER" sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null) sf_second=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null) if [ "$sf_first" = "PHASE:failed" ] && echo "$sf_second" | grep -q "api_error: rate_limit"; then ok "StopFailure hook writes PHASE:failed with api_error: rate_limit" else fail "StopFailure hook phase file: first='$sf_first' second='$sf_second'" fi if [ -f "$SF_MARKER" ]; then ok "StopFailure hook writes phase-changed marker" else fail "StopFailure hook did not write phase-changed marker" fi rm -f "$SF_MARKER" "$PHASE_FILE" # 10b: server_error stop reason printf '{"stop_reason":"server_error"}' \ | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER" sf_second=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null) if echo "$sf_second" | grep -q "api_error: server_error"; then ok "StopFailure hook writes api_error: server_error" else fail "StopFailure hook server_error: got '$sf_second'" fi rm -f "$SF_MARKER" "$PHASE_FILE" # 10c: authentication_failed stop reason printf '{"stop_reason":"authentication_failed"}' \ | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER" sf_second=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null) if echo "$sf_second" | grep -q "api_error: authentication_failed"; then ok "StopFailure hook writes api_error: authentication_failed" else fail "StopFailure hook authentication_failed: got '$sf_second'" fi rm -f "$SF_MARKER" "$PHASE_FILE" # 10e: missing phase_file arg → no-op (exit 0, no crash) printf '{"stop_reason":"rate_limit"}' | "$STOP_FAILURE_HOOK" "" "$SF_MARKER" if [ ! -f "$PHASE_FILE" ]; then ok "StopFailure hook no-ops when phase_file is empty" else fail "StopFailure hook should not write when phase_file is empty" fi rm -f "$SF_MARKER" # 10f: missing marker arg → phase file still written, no marker printf '{"stop_reason":"billing_error"}' \ | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "" sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null) sf_marker_exists="no" [ -f "$SF_MARKER" ] && sf_marker_exists="yes" if [ "$sf_first" = "PHASE:failed" ] && [ "$sf_marker_exists" = "no" ]; then ok "StopFailure hook writes phase without marker when marker arg is empty" else fail "StopFailure hook: first='$sf_first' marker_exists=$sf_marker_exists" fi rm -f "$PHASE_FILE" # 10g: terminal phase guard — does not overwrite PHASE:done echo "PHASE:done" > "$PHASE_FILE" printf '{"stop_reason":"rate_limit"}' \ | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER" sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null) if [ "$sf_first" = "PHASE:done" ] && [ ! -f "$SF_MARKER" ]; then ok "StopFailure hook does not overwrite terminal PHASE:done" else fail "StopFailure hook overwrote PHASE:done: first='$sf_first'" fi rm -f "$SF_MARKER" "$PHASE_FILE" # 10h: terminal phase guard — does not overwrite PHASE:merged echo "PHASE:merged" > "$PHASE_FILE" printf '{"stop_reason":"server_error"}' \ | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER" sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null) if [ "$sf_first" = "PHASE:merged" ] && [ ! -f "$SF_MARKER" ]; then ok "StopFailure hook does not overwrite terminal PHASE:merged" else fail "StopFailure hook overwrote PHASE:merged: first='$sf_first'" fi rm -f "$SF_MARKER" "$PHASE_FILE" # 10i: terminal phase guard — does not overwrite PHASE:escalate echo "PHASE:escalate" > "$PHASE_FILE" printf '{"stop_reason":"rate_limit"}' \ | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER" sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null) if [ "$sf_first" = "PHASE:escalate" ] && [ ! -f "$SF_MARKER" ]; then ok "StopFailure hook does not overwrite terminal PHASE:escalate" else fail "StopFailure hook overwrote PHASE:escalate: first='$sf_first'" fi rm -f "$SF_MARKER" "$PHASE_FILE" else fail "StopFailure hook script not found or not executable: $STOP_FAILURE_HOOK" fi # ── Test 11: phase-changed marker resets mtime guard ───────────────────── # Simulates monitor_phase_loop behavior: when marker exists, last_mtime # is reset to 0 so the phase is processed even if mtime hasn't changed. echo "PHASE:awaiting_ci" > "$PHASE_FILE" LAST_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) PHASE_MTIME="$LAST_MTIME" # Without marker, mtime guard blocks processing (same mtime) if [ "$PHASE_MTIME" -le "$LAST_MTIME" ]; then ok "mtime guard blocks when no marker present (baseline)" else fail "mtime guard should block when phase_mtime <= last_mtime" fi # Now simulate marker present — reset last_mtime to 0 MARKER_FILE="/tmp/phase-changed-test-mtime.marker" date +%s > "$MARKER_FILE" if [ -f "$MARKER_FILE" ]; then rm -f "$MARKER_FILE" LAST_MTIME=0 fi if [ "$PHASE_MTIME" -gt "$LAST_MTIME" ]; then ok "phase-changed marker resets mtime guard (phase now processable)" else fail "phase-changed marker did not reset mtime guard" fi # ── Test 12: crash handler treats PHASE:escalate as terminal ─────────── # Simulates the monitor_phase_loop crash handler: when a session exits while # the phase file holds PHASE:escalate, it must be treated as terminal # (fall through to the phase handler) rather than invoking callback with # PHASE:crashed, which would lose the escalation intent. CRASH_CALLBACK_PHASE="" mock_crash_callback() { CRASH_CALLBACK_PHASE="$1"; } echo "PHASE:escalate" > "$PHASE_FILE" current_phase=$(head -1 "$PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true) case "$current_phase" in PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate) # terminal — fall through to phase handler (correct behavior) mock_crash_callback "$current_phase" ;; *) # would invoke callback with PHASE:crashed (incorrect for escalate) mock_crash_callback "PHASE:crashed" ;; esac if [ "$CRASH_CALLBACK_PHASE" = "PHASE:escalate" ]; then ok "crash handler preserves PHASE:escalate (not replaced by PHASE:crashed)" else fail "crash handler lost escalation intent: expected PHASE:escalate, got $CRASH_CALLBACK_PHASE" fi # Also verify the other terminal phases still work in crash handler for tp in "PHASE:done" "PHASE:failed" "PHASE:merged"; do echo "$tp" > "$PHASE_FILE" current_phase=$(head -1 "$PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true) case "$current_phase" in PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate) ok "crash handler treats $tp as terminal" ;; *) fail "crash handler does not treat $tp as terminal" ;; esac done # ── Cleanup ─────────────────────────────────────────────────────────────────── rm -f "$PHASE_FILE" # ── Summary ─────────────────────────────────────────────────────────────────── echo "" printf 'Results: %d passed, %d failed\n' "$PASS" "$FAIL" if [ "$FAIL" -eq 0 ]; then echo "All tests passed." exit 0 else echo "Some tests failed." exit 1 fi