disinto/dev/phase-test.sh

#!/usr/bin/env bash
# phase-test.sh — Integration test for the phase-signaling protocol
#
# Simulates a Claude session writing phases and an orchestrator reading them.
# Tests all phase values and verifies the read/write contract.
#
# Usage: bash dev/phase-test.sh

set -euo pipefail

# Source canonical read_phase() from shared library
source "$(dirname "$0")/../lib/agent-session.sh"

PROJECT="testproject"
ISSUE="999"
PHASE_FILE="/tmp/dev-session-${PROJECT}-${ISSUE}.phase"

PASS=0
FAIL=0

ok() {
  printf '[PASS] %s\n' "$1"
  PASS=$((PASS + 1))
}

fail() {
  printf '[FAIL] %s\n' "$1"
  FAIL=$((FAIL + 1))
}

# Cleanup
rm -f "$PHASE_FILE"

# ── Test 1: phase file path convention ────────────────────────────────────────
expected_path="/tmp/dev-session-${PROJECT}-${ISSUE}.phase"
if [ "$PHASE_FILE" = "$expected_path" ]; then
  ok "phase file path follows /tmp/dev-session-{project}-{issue}.phase convention"
else
  fail "phase file path mismatch: got $PHASE_FILE, expected $expected_path"
fi

# ── Test 2: write and read each phase sentinel ─────────────────────────────────
check_phase() {
  local sentinel="$1"
  echo "$sentinel" > "$PHASE_FILE"
  local got
  got=$(tr -d '[:space:]' < "$PHASE_FILE")
  if [ "$got" = "$sentinel" ]; then
    ok "write/read: $sentinel"
  else
    fail "write/read: expected '$sentinel', got '$got'"
  fi
}

check_phase "PHASE:awaiting_ci"
check_phase "PHASE:awaiting_review"
check_phase "PHASE:escalate"
check_phase "PHASE:done"
check_phase "PHASE:failed"

# ── Test 3: write overwrites (not appends) ─────────────────────────────────────
echo "PHASE:awaiting_ci" > "$PHASE_FILE"
echo "PHASE:awaiting_review" > "$PHASE_FILE"
line_count=$(wc -l < "$PHASE_FILE")
file_content=$(< "$PHASE_FILE")
if [ "$line_count" -eq 1 ]; then
  ok "phase file overwrite (single line after two writes)"
else
  fail "phase file should have 1 line, got $line_count"
fi
if [ "$file_content" = "PHASE:awaiting_review" ]; then
  ok "phase file overwrite (content is second write, not first)"
else
  fail "phase file content should be 'PHASE:awaiting_review', got '$file_content'"
fi

# ── Test 4: failed phase with reason ──────────────────────────────────────────
printf 'PHASE:failed\nReason: %s\n' "shellcheck failed on ci.sh" > "$PHASE_FILE"
first_line=$(head -1 "$PHASE_FILE")
second_line=$(sed -n '2p' "$PHASE_FILE")
if [ "$first_line" = "PHASE:failed" ] && echo "$second_line" | grep -q "^Reason:"; then
  ok "PHASE:failed with reason line"
else
  fail "PHASE:failed format: first='$first_line' second='$second_line'"
fi

# ── Test 5: orchestrator read function (canonical read_phase from lib/agent-session.sh)
echo "PHASE:awaiting_ci" > "$PHASE_FILE"
phase=$(read_phase "$PHASE_FILE")
if [ "$phase" = "PHASE:awaiting_ci" ]; then
  ok "orchestrator read_phase() extracts first line"
else
  fail "orchestrator read_phase() got: '$phase'"
fi

# ── Test 6: missing file returns empty ────────────────────────────────────────
rm -f "$PHASE_FILE"
phase=$(read_phase "$PHASE_FILE")
if [ -z "$phase" ]; then
  ok "missing phase file returns empty string"
else
  fail "missing phase file should return empty, got: '$phase'"
fi

# ── Test 7: all valid phase values are recognized ─────────────────────────────
is_valid_phase() {
  local p="$1"
  case "$p" in
    PHASE:awaiting_ci|PHASE:awaiting_review|PHASE:escalate|PHASE:done|PHASE:failed)
      return 0 ;;
    *)
      return 1 ;;
  esac
}

for p in "PHASE:awaiting_ci" "PHASE:awaiting_review" "PHASE:escalate" \
         "PHASE:done" "PHASE:failed"; do
  if is_valid_phase "$p"; then
    ok "is_valid_phase: $p"
  else
    fail "is_valid_phase rejected valid phase: $p"
  fi
done

if ! is_valid_phase "PHASE:unknown"; then
  ok "is_valid_phase rejects unknown phase"
else
  fail "is_valid_phase should reject PHASE:unknown"
fi

# ── Test 8: escalate mtime guard — no duplicate notify on second poll ─────
# Simulates the LAST_PHASE_MTIME guard from dev-agent.sh: after the orchestrator
# handles PHASE:escalate once, subsequent poll cycles must not re-trigger
# notify() if the phase file was not rewritten.
NOTIFY_COUNT=0
mock_notify() { NOTIFY_COUNT=$((NOTIFY_COUNT + 1)); }

echo "PHASE:escalate" > "$PHASE_FILE"
LAST_PHASE_MTIME=0

# --- First poll cycle: phase file is newer than LAST_PHASE_MTIME ---
PHASE_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0)
CURRENT_PHASE=$(tr -d '[:space:]' < "$PHASE_FILE")

if [ -n "$CURRENT_PHASE" ] && [ "$PHASE_MTIME" -gt "$LAST_PHASE_MTIME" ]; then
  # Orchestrator would handle the phase and call notify()
  mock_notify
  LAST_PHASE_MTIME="$PHASE_MTIME"
fi

# --- Second poll cycle: file not touched, mtime unchanged ---
sleep 1  # ensure wall-clock advances past the original mtime
PHASE_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0)
CURRENT_PHASE=$(tr -d '[:space:]' < "$PHASE_FILE")

if [ -n "$CURRENT_PHASE" ] && [ "$PHASE_MTIME" -gt "$LAST_PHASE_MTIME" ]; then
  # This branch must NOT execute — mtime guard should block it
  mock_notify
fi

if [ "$NOTIFY_COUNT" -eq 1 ]; then
  ok "escalate mtime guard: notify called once, blocked on second poll"
else
  fail "escalate mtime guard: expected 1 notify call, got $NOTIFY_COUNT"
fi

# ── Test 9: PostToolUse hook detects writes, ignores reads ────────────────
HOOK_SCRIPT="$(dirname "$0")/../lib/hooks/on-phase-change.sh"
MARKER_FILE="/tmp/phase-changed-test-session.marker"
rm -f "$MARKER_FILE"

if [ -x "$HOOK_SCRIPT" ]; then
  # 9a: Bash redirect to phase file → marker written
  printf '{"tool_name":"Bash","tool_input":{"command":"echo PHASE:awaiting_ci > %s"}}' \
    "$PHASE_FILE" | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE"
  if [ -f "$MARKER_FILE" ]; then
    ok "PostToolUse hook writes marker on Bash redirect to phase file"
  else
    fail "PostToolUse hook did not write marker on Bash redirect"
  fi
  rm -f "$MARKER_FILE"

  # 9b: Write tool targeting phase file → marker written
  printf '{"tool_name":"Write","tool_input":{"file_path":"%s","content":"PHASE:done"}}' \
    "$PHASE_FILE" | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE"
  if [ -f "$MARKER_FILE" ]; then
    ok "PostToolUse hook writes marker on Write tool to phase file"
  else
    fail "PostToolUse hook did not write marker on Write tool"
  fi
  rm -f "$MARKER_FILE"

  # 9c: Bash read of phase file (cat) → NO marker (not a write)
  printf '{"tool_name":"Bash","tool_input":{"command":"cat %s"}}' \
    "$PHASE_FILE" | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE"
  if [ ! -f "$MARKER_FILE" ]; then
    ok "PostToolUse hook ignores Bash read of phase file (no false positive)"
  else
    fail "PostToolUse hook wrote marker for Bash read (false positive)"
  fi
  rm -f "$MARKER_FILE"

  # 9d: Unrelated Bash command → NO marker
  printf '{"tool_name":"Bash","tool_input":{"command":"echo hello > /tmp/other-file"}}' \
    | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE"
  if [ ! -f "$MARKER_FILE" ]; then
    ok "PostToolUse hook skips marker for unrelated operations"
  else
    fail "PostToolUse hook wrote marker for unrelated operation (false positive)"
  fi
  rm -f "$MARKER_FILE"

  # 9e: Write tool targeting different file → NO marker
  printf '{"tool_name":"Write","tool_input":{"file_path":"/tmp/other-file","content":"hello"}}' \
    | "$HOOK_SCRIPT" "$PHASE_FILE" "$MARKER_FILE"
  if [ ! -f "$MARKER_FILE" ]; then
    ok "PostToolUse hook skips marker for Write to different file"
  else
    fail "PostToolUse hook wrote marker for Write to different file (false positive)"
  fi
  rm -f "$MARKER_FILE"
else
  fail "PostToolUse hook script not found or not executable: $HOOK_SCRIPT"
fi

# ── Test 10: StopFailure hook writes phase file and marker on API error ───
STOP_FAILURE_HOOK="$(dirname "$0")/../lib/hooks/on-stop-failure.sh"
SF_MARKER="/tmp/phase-changed-test-sf.marker"
rm -f "$SF_MARKER" "$PHASE_FILE"

if [ -x "$STOP_FAILURE_HOOK" ]; then
  # 10a: rate_limit stop reason → PHASE:failed with api_error reason
  printf '{"stop_reason":"rate_limit"}' \
    | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER"
  sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null)
  sf_second=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null)
  if [ "$sf_first" = "PHASE:failed" ] && echo "$sf_second" | grep -q "api_error: rate_limit"; then
    ok "StopFailure hook writes PHASE:failed with api_error: rate_limit"
  else
    fail "StopFailure hook phase file: first='$sf_first' second='$sf_second'"
  fi
  if [ -f "$SF_MARKER" ]; then
    ok "StopFailure hook writes phase-changed marker"
  else
    fail "StopFailure hook did not write phase-changed marker"
  fi
  rm -f "$SF_MARKER" "$PHASE_FILE"

  # 10b: server_error stop reason
  printf '{"stop_reason":"server_error"}' \
    | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER"
  sf_second=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null)
  if echo "$sf_second" | grep -q "api_error: server_error"; then
    ok "StopFailure hook writes api_error: server_error"
  else
    fail "StopFailure hook server_error: got '$sf_second'"
  fi
  rm -f "$SF_MARKER" "$PHASE_FILE"

  # 10c: authentication_failed stop reason
  printf '{"stop_reason":"authentication_failed"}' \
    | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER"
  sf_second=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null)
  if echo "$sf_second" | grep -q "api_error: authentication_failed"; then
    ok "StopFailure hook writes api_error: authentication_failed"
  else
    fail "StopFailure hook authentication_failed: got '$sf_second'"
  fi
  rm -f "$SF_MARKER" "$PHASE_FILE"

  # 10e: missing phase_file arg → no-op (exit 0, no crash)
  printf '{"stop_reason":"rate_limit"}' | "$STOP_FAILURE_HOOK" "" "$SF_MARKER"
  if [ ! -f "$PHASE_FILE" ]; then
    ok "StopFailure hook no-ops when phase_file is empty"
  else
    fail "StopFailure hook should not write when phase_file is empty"
  fi
  rm -f "$SF_MARKER"

  # 10f: missing marker arg → phase file still written, no marker
  printf '{"stop_reason":"billing_error"}' \
    | "$STOP_FAILURE_HOOK" "$PHASE_FILE" ""
  sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null)
  sf_marker_exists="no"
  [ -f "$SF_MARKER" ] && sf_marker_exists="yes"
  if [ "$sf_first" = "PHASE:failed" ] && [ "$sf_marker_exists" = "no" ]; then
    ok "StopFailure hook writes phase without marker when marker arg is empty"
  else
    fail "StopFailure hook: first='$sf_first' marker_exists=$sf_marker_exists"
  fi
  rm -f "$PHASE_FILE"

  # 10g: terminal phase guard — does not overwrite PHASE:done
  echo "PHASE:done" > "$PHASE_FILE"
  printf '{"stop_reason":"rate_limit"}' \
    | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER"
  sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null)
  if [ "$sf_first" = "PHASE:done" ] && [ ! -f "$SF_MARKER" ]; then
    ok "StopFailure hook does not overwrite terminal PHASE:done"
  else
    fail "StopFailure hook overwrote PHASE:done: first='$sf_first'"
  fi
  rm -f "$SF_MARKER" "$PHASE_FILE"

  # 10h: terminal phase guard — does not overwrite PHASE:merged
  echo "PHASE:merged" > "$PHASE_FILE"
  printf '{"stop_reason":"server_error"}' \
    | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER"
  sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null)
  if [ "$sf_first" = "PHASE:merged" ] && [ ! -f "$SF_MARKER" ]; then
    ok "StopFailure hook does not overwrite terminal PHASE:merged"
  else
    fail "StopFailure hook overwrote PHASE:merged: first='$sf_first'"
  fi
  rm -f "$SF_MARKER" "$PHASE_FILE"

  # 10i: terminal phase guard — does not overwrite PHASE:escalate
  echo "PHASE:escalate" > "$PHASE_FILE"
  printf '{"stop_reason":"rate_limit"}' \
    | "$STOP_FAILURE_HOOK" "$PHASE_FILE" "$SF_MARKER"
  sf_first=$(head -1 "$PHASE_FILE" 2>/dev/null)
  if [ "$sf_first" = "PHASE:escalate" ] && [ ! -f "$SF_MARKER" ]; then
    ok "StopFailure hook does not overwrite terminal PHASE:escalate"
  else
    fail "StopFailure hook overwrote PHASE:escalate: first='$sf_first'"
  fi
  rm -f "$SF_MARKER" "$PHASE_FILE"
else
  fail "StopFailure hook script not found or not executable: $STOP_FAILURE_HOOK"
fi

# ── Test 11: phase-changed marker resets mtime guard ─────────────────────
# Simulates monitor_phase_loop behavior: when marker exists, last_mtime
# is reset to 0 so the phase is processed even if mtime hasn't changed.
echo "PHASE:awaiting_ci" > "$PHASE_FILE"
LAST_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0)
PHASE_MTIME="$LAST_MTIME"

# Without marker, mtime guard blocks processing (same mtime)
if [ "$PHASE_MTIME" -le "$LAST_MTIME" ]; then
  ok "mtime guard blocks when no marker present (baseline)"
else
  fail "mtime guard should block when phase_mtime <= last_mtime"
fi

# Now simulate marker present — reset last_mtime to 0
MARKER_FILE="/tmp/phase-changed-test-mtime.marker"
date +%s > "$MARKER_FILE"
if [ -f "$MARKER_FILE" ]; then
  rm -f "$MARKER_FILE"
  LAST_MTIME=0
fi

if [ "$PHASE_MTIME" -gt "$LAST_MTIME" ]; then
  ok "phase-changed marker resets mtime guard (phase now processable)"
else
  fail "phase-changed marker did not reset mtime guard"
fi

# ── Test 12: crash handler treats PHASE:escalate as terminal ───────────
# Simulates the monitor_phase_loop crash handler: when a session exits while
# the phase file holds PHASE:escalate, it must be treated as terminal
# (fall through to the phase handler) rather than invoking callback with
# PHASE:crashed, which would lose the escalation intent.
CRASH_CALLBACK_PHASE=""
mock_crash_callback() { CRASH_CALLBACK_PHASE="$1"; }

echo "PHASE:escalate" > "$PHASE_FILE"
current_phase=$(head -1 "$PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true)
case "$current_phase" in
  PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate)
    # terminal — fall through to phase handler (correct behavior)
    mock_crash_callback "$current_phase"
    ;;
  *)
    # would invoke callback with PHASE:crashed (incorrect for escalate)
    mock_crash_callback "PHASE:crashed"
    ;;
esac

if [ "$CRASH_CALLBACK_PHASE" = "PHASE:escalate" ]; then
  ok "crash handler preserves PHASE:escalate (not replaced by PHASE:crashed)"
else
  fail "crash handler lost escalation intent: expected PHASE:escalate, got $CRASH_CALLBACK_PHASE"
fi

# Also verify the other terminal phases still work in crash handler
for tp in "PHASE:done" "PHASE:failed" "PHASE:merged"; do
  echo "$tp" > "$PHASE_FILE"
  current_phase=$(head -1 "$PHASE_FILE" 2>/dev/null | tr -d '[:space:]' || true)
  case "$current_phase" in
    PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate)
      ok "crash handler treats $tp as terminal"
      ;;
    *)
      fail "crash handler does not treat $tp as terminal"
      ;;
  esac
done

# ── Cleanup ───────────────────────────────────────────────────────────────────
rm -f "$PHASE_FILE"

# ── Summary ───────────────────────────────────────────────────────────────────
echo ""
printf 'Results: %d passed, %d failed\n' "$PASS" "$FAIL"
if [ "$FAIL" -eq 0 ]; then
  echo "All tests passed."
  exit 0
else
  echo "Some tests failed."
  exit 1
fi