#!/usr/bin/env bash # dev-poll.sh — Pull-based scheduler: find the next ready issue and start dev-agent # # Pull system: issues labeled "backlog" are candidates. An issue is READY when # ALL its dependency issues are closed (and their PRs merged). # No "todo" label needed — readiness is derived from reality. # # Priority: # 1. Orphaned "in-progress" issues (agent died or PR needs attention) # 2. Ready "priority" + "backlog" issues (FIFO within tier) # 3. Ready "backlog" issues without "priority" (FIFO within tier) # # Usage: # cron every 10min # dev-poll.sh [projects/harb.toml] # optional project config set -euo pipefail # Load shared environment (with optional project TOML override) export PROJECT_TOML="${1:-}" source "$(dirname "$0")/../lib/env.sh" source "$(dirname "$0")/../lib/ci-helpers.sh" # shellcheck source=../lib/mirrors.sh source "$(dirname "$0")/../lib/mirrors.sh" # shellcheck source=../lib/guard.sh source "$(dirname "$0")/../lib/guard.sh" check_active dev # Gitea labels API requires []int64 — look up the "underspecified" label ID once UNDERSPECIFIED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \ | jq -r '.[] | select(.name == "underspecified") | .id' 2>/dev/null || true) UNDERSPECIFIED_LABEL_ID="${UNDERSPECIFIED_LABEL_ID:-1300816}" # Track CI fix attempts per PR to avoid infinite respawn loops CI_FIX_TRACKER="${FACTORY_ROOT}/dev/ci-fixes-${PROJECT_NAME:-default}.json" CI_FIX_LOCK="${CI_FIX_TRACKER}.lock" ci_fix_count() { local pr="$1" flock "$CI_FIX_LOCK" python3 -c "import json,sys;d=json.load(open('$CI_FIX_TRACKER')) if __import__('os').path.exists('$CI_FIX_TRACKER') else {};print(d.get(str($pr),0))" 2>/dev/null || echo 0 } ci_fix_increment() { local pr="$1" flock "$CI_FIX_LOCK" python3 -c " import json,os f='$CI_FIX_TRACKER' d=json.load(open(f)) if os.path.exists(f) else {} d[str($pr)]=d.get(str($pr),0)+1 json.dump(d,open(f,'w')) " 2>/dev/null || true } ci_fix_reset() { local pr="$1" flock "$CI_FIX_LOCK" python3 -c " import json,os f='$CI_FIX_TRACKER' d=json.load(open(f)) if os.path.exists(f) else {} d.pop(str($pr),None) json.dump(d,open(f,'w')) " 2>/dev/null || true } ci_fix_check_and_increment() { local pr="$1" local check_only="${2:-}" flock "$CI_FIX_LOCK" python3 -c " import json,os f='$CI_FIX_TRACKER' check_only = '${check_only}' == 'check_only' d=json.load(open(f)) if os.path.exists(f) else {} count=d.get(str($pr),0) if count>3: print('exhausted:'+str(count)) elif count==3: d[str($pr)]=4 json.dump(d,open(f,'w')) print('exhausted_first_time:3') elif check_only: print('ok:'+str(count)) else: count+=1 d[str($pr)]=count json.dump(d,open(f,'w')) print('ok:'+str(count)) " 2>/dev/null || echo "exhausted:99" } # Check whether an issue already has the "blocked" label is_blocked() { local issue="$1" forge_api GET "/issues/${issue}/labels" 2>/dev/null \ | jq -e '.[] | select(.name == "blocked")' >/dev/null 2>&1 } # Post a CI-exhaustion diagnostic comment and label issue as blocked. # Args: issue_num pr_num attempts _post_ci_blocked_comment() { local issue_num="$1" pr_num="$2" attempts="$3" local blocked_id blocked_id=$(ensure_blocked_label_id) [ -z "$blocked_id" ] && return 0 local comment comment="### Session failure diagnostic | Field | Value | |---|---| | Exit reason | \`ci_exhausted_poll (${attempts} attempts)\` | | Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` | | PR | #${pr_num} |" curl -sf -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${FORGE_API}/issues/${issue_num}/comments" \ -d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true curl -sf -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${FORGE_API}/issues/${issue_num}/labels" \ -d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true } # ============================================================================= # HELPER: handle CI-exhaustion check/block (DRY for 3 call sites) # Sets CI_FIX_ATTEMPTS for caller use. Returns 0 if exhausted, 1 if not. # # Pass "check_only" as third arg for the backlog scan path: ok-counts are # returned without incrementing (deferred to launch time so a WAITING_PRS # exit cannot waste a fix attempt). The 3→4 sentinel bump is always atomic # regardless of mode, preventing duplicate blocked labels from concurrent # pollers. # ============================================================================= handle_ci_exhaustion() { local pr_num="$1" issue_num="$2" local check_only="${3:-}" local result # Fast path: already blocked — skip without touching counter. if is_blocked "$issue_num"; then CI_FIX_ATTEMPTS=$(ci_fix_count "$pr_num") log "PR #${pr_num} (issue #${issue_num}) already blocked (${CI_FIX_ATTEMPTS} attempts) — skipping" return 0 fi # Single flock-protected call: read + threshold-check + conditional bump. # In check_only mode, ok-counts are returned without incrementing (deferred # to launch time). In both modes, the 3→4 sentinel bump is atomic, so only # one concurrent poller can ever receive exhausted_first_time:3 and label # the issue blocked. result=$(ci_fix_check_and_increment "$pr_num" "$check_only") case "$result" in ok:*) CI_FIX_ATTEMPTS="${result#ok:}" return 1 ;; exhausted_first_time:*) CI_FIX_ATTEMPTS="${result#exhausted_first_time:}" log "PR #${pr_num} (issue #${issue_num}) CI exhausted (${CI_FIX_ATTEMPTS} attempts) — marking blocked" _post_ci_blocked_comment "$issue_num" "$pr_num" "$CI_FIX_ATTEMPTS" matrix_send "dev" "🚨 PR #${pr_num} (issue #${issue_num}) CI failed after ${CI_FIX_ATTEMPTS} attempts — marked blocked" 2>/dev/null || true ;; exhausted:*) CI_FIX_ATTEMPTS="${result#exhausted:}" log "PR #${pr_num} (issue #${issue_num}) CI exhausted (${CI_FIX_ATTEMPTS} attempts) — already blocked, skipping" ;; *) CI_FIX_ATTEMPTS=99 log "PR #${pr_num} (issue #${issue_num}) CI exhausted (${CI_FIX_ATTEMPTS} attempts) — already blocked, skipping" ;; esac return 0 } # ============================================================================= # HELPER: merge an approved PR directly (no Claude needed) # # Merging an approved, CI-green PR is a single API call. Spawning dev-agent # for this fails when the issue is already closed (forge auto-closes issues # on PR creation when body contains "Fixes #N"), causing a respawn loop (#344). # ============================================================================= try_direct_merge() { local pr_num="$1" issue_num="$2" log "PR #${pr_num} (issue #${issue_num}) approved + CI green → attempting direct merge" local merge_resp merge_http merge_resp=$(curl -sf -w '\n%{http_code}' -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${API}/pulls/${pr_num}/merge" \ -d '{"Do":"merge","delete_branch_after_merge":true}' 2>/dev/null) || true merge_http=$(echo "$merge_resp" | tail -1) if [ "${merge_http:-0}" = "200" ] || [ "${merge_http:-0}" = "204" ]; then log "PR #${pr_num} merged successfully" if [ "$issue_num" -gt 0 ]; then # Close the issue (may already be closed by forge auto-close) curl -sf -X PATCH \ -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${API}/issues/${issue_num}" \ -d '{"state":"closed"}' >/dev/null 2>&1 || true # Remove in-progress label curl -sf -X DELETE \ -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues/${issue_num}/labels/in-progress" >/dev/null 2>&1 || true # Clean up phase/session artifacts rm -f "/tmp/dev-session-${PROJECT_NAME}-${issue_num}.phase" \ "/tmp/dev-impl-summary-${PROJECT_NAME}-${issue_num}.txt" matrix_send "dev" "✅ PR #${pr_num} (issue #${issue_num}) merged directly by dev-poll" 2>/dev/null || true else matrix_send "dev" "✅ PR #${pr_num} merged directly by dev-poll (chore)" 2>/dev/null || true fi # Pull merged primary branch and push to mirrors git -C "${PROJECT_REPO_ROOT:-}" fetch origin "${PRIMARY_BRANCH:-}" 2>/dev/null || true git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH:-}" 2>/dev/null || true git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH:-}" 2>/dev/null || true mirror_push # Clean up CI fix tracker ci_fix_reset "$pr_num" return 0 fi log "PR #${pr_num} direct merge failed (HTTP ${merge_http:-?}) — falling back to dev-agent" return 1 } API="${FORGE_API}" LOCKFILE="/tmp/dev-agent-${PROJECT_NAME:-default}.lock" LOGFILE="${FACTORY_ROOT}/dev/dev-agent-${PROJECT_NAME:-default}.log" PREFLIGHT_RESULT="/tmp/dev-agent-preflight.json" SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" log() { printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" } # ============================================================================= # PRE-LOCK: merge approved + CI-green PRs (no Claude session needed) # # Merging is a single API call — it doesn't need the dev-agent lock. # This ensures approved PRs get merged even while a dev-agent is running. # (See #531: direct merges should not be blocked by agent lock) # ============================================================================= log "pre-lock: scanning for mergeable PRs" PL_PRS=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls?state=open&limit=20") PL_MERGED_ANY=false for i in $(seq 0 $(($(echo "$PL_PRS" | jq 'length') - 1))); do PL_PR_NUM=$(echo "$PL_PRS" | jq -r ".[$i].number") PL_PR_SHA=$(echo "$PL_PRS" | jq -r ".[$i].head.sha") PL_PR_BRANCH=$(echo "$PL_PRS" | jq -r ".[$i].head.ref") PL_PR_TITLE=$(echo "$PL_PRS" | jq -r ".[$i].title") PL_PR_BODY=$(echo "$PL_PRS" | jq -r ".[$i].body // \"\"") # Extract issue number from branch name, PR title, or PR body PL_ISSUE=$(echo "$PL_PR_BRANCH" | grep -oP '(?<=fix/issue-)\d+' || true) if [ -z "$PL_ISSUE" ]; then PL_ISSUE=$(echo "$PL_PR_TITLE" | grep -oP '#\K\d+' | tail -1 || true) fi if [ -z "$PL_ISSUE" ]; then PL_ISSUE=$(echo "$PL_PR_BODY" | grep -oiP '(?:closes|fixes|resolves)\s*#\K\d+' | head -1 || true) fi if [ -z "$PL_ISSUE" ]; then # Allow chore PRs from gardener/planner/predictor to merge without issue number if [[ "$PL_PR_BRANCH" =~ ^chore/(gardener|planner|predictor)- ]]; then PL_ISSUE=0 else continue fi fi PL_CI_STATE=$(ci_commit_status "$PL_PR_SHA") || true # Non-code PRs may have no CI — treat as passed if ! ci_passed "$PL_CI_STATE" && ! ci_required_for_pr "$PL_PR_NUM"; then PL_CI_STATE="success" fi if ! ci_passed "$PL_CI_STATE"; then continue fi # Check for approval (non-stale) PL_REVIEWS=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${PL_PR_NUM}/reviews") || true PL_HAS_APPROVE=$(echo "$PL_REVIEWS" | \ jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true if [ "${PL_HAS_APPROVE:-0}" -gt 0 ]; then if try_direct_merge "$PL_PR_NUM" "$PL_ISSUE"; then PL_MERGED_ANY=true fi # Direct merge failed — will fall through to post-lock dev-agent fallback fi done if [ "$PL_MERGED_ANY" = true ]; then log "pre-lock: merged PR(s) successfully — exiting" exit 0 fi log "pre-lock: no PRs merged, checking agent lock" # --- Check if dev-agent already running --- if [ -f "$LOCKFILE" ]; then LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then log "agent running (PID ${LOCK_PID})" exit 0 fi rm -f "$LOCKFILE" fi # --- Memory guard --- AVAIL_MB=$(awk '/MemAvailable/{printf "%d", $2/1024}' /proc/meminfo) if [ "$AVAIL_MB" -lt 2000 ]; then log "SKIP: only ${AVAIL_MB}MB available (need 2000MB)" matrix_send "dev" "⚠️ Low memory (${AVAIL_MB}MB) — skipping dev-agent" 2>/dev/null || true exit 0 fi # ============================================================================= # HELPER: check if a dependency issue is fully resolved (closed + PR merged) # ============================================================================= dep_is_merged() { local dep_num="$1" # Check issue is closed local dep_state dep_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues/${dep_num}" | jq -r '.state // "open"') if [ "$dep_state" != "closed" ]; then return 1 fi # Issue closed = dep satisfied. The scheduler only closes issues after # merging, so closed state is trustworthy. No need to hunt for the # specific PR — that was over-engineering that caused false negatives. return 0 } # ============================================================================= # HELPER: extract dependency numbers from issue body # ============================================================================= get_deps() { local issue_body="$1" # Shared parser: lib/parse-deps.sh (single source of truth) echo "$issue_body" | bash "${FACTORY_ROOT}/lib/parse-deps.sh" } # ============================================================================= # HELPER: check if issue is ready (all deps merged) # ============================================================================= issue_is_ready() { local issue_num="$1" local issue_body="$2" local deps deps=$(get_deps "$issue_body") if [ -z "$deps" ]; then # No dependencies — always ready return 0 fi while IFS= read -r dep; do [ -z "$dep" ] && continue if ! dep_is_merged "$dep"; then log " #${issue_num} blocked: dep #${dep} not merged" return 1 fi done <<< "$deps" return 0 } # ============================================================================= # PRIORITY 1: orphaned in-progress issues # ============================================================================= log "checking for in-progress issues" ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues?state=open&labels=in-progress&limit=10&type=issues") ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length') if [ "$ORPHAN_COUNT" -gt 0 ]; then ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number') # Formula guard: formula-labeled issues should not be worked on by dev-agent. # Remove in-progress label and skip to prevent infinite respawn cycle (#115). ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|action|prediction/backlog|prediction/unreviewed)$' | head -1) || true if [ -n "$SKIP_LABEL" ]; then log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues/${ISSUE_NUM}/labels/in-progress" >/dev/null 2>&1 || true exit 0 fi # Check if there's already an open PR for this issue HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls?state=open&limit=20" | \ jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ '.[] | select(.head.ref == $branch) | .number' | head -1) || true if [ -n "$HAS_PR" ]; then PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true CI_STATE=$(ci_commit_status "$PR_SHA") || true # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then CI_STATE="success" log "PR #${HAS_PR} has no code files — treating CI as passed" fi # Check formal reviews (single fetch to avoid race window) REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${HAS_PR}/reviews") || true HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then exit 0 fi # Direct merge failed (conflicts?) — fall back to dev-agent SESSION_NAME="dev-${PROJECT_NAME}-${ISSUE_NUM}" if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then log "issue #${ISSUE_NUM} already has active session ${SESSION_NAME} — skipping" else log "falling back to dev-agent for PR #${HAS_PR} merge" nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" fi exit 0 # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is # pending/unknown. Definitive CI failure is handled by the elif below. elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then SESSION_NAME="dev-${PROJECT_NAME}-${ISSUE_NUM}" if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then log "issue #${ISSUE_NUM} already has active session ${SESSION_NAME} — skipping" else log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" fi exit 0 elif ci_failed "$CI_STATE"; then SESSION_NAME="dev-${PROJECT_NAME}-${ISSUE_NUM}" if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then log "issue #${ISSUE_NUM} already has active session ${SESSION_NAME} — skipping" exit 0 fi if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then # Fall through to backlog scan instead of exit : else # Increment at actual launch time (not on guard-hit paths) if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then exit 0 # exhausted between check and launch fi log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" exit 0 fi else log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" fi else SESSION_NAME="dev-${PROJECT_NAME}-${ISSUE_NUM}" if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then log "issue #${ISSUE_NUM} already has active session ${SESSION_NAME} — skipping" else log "recovering orphaned issue #${ISSUE_NUM} (no PR found)" nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" fi exit 0 fi fi # ============================================================================= # PRIORITY 1.5: any open PR with REQUEST_CHANGES or CI failure (stuck PRs) # ============================================================================= log "checking for stuck PRs" OPEN_PRS=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls?state=open&limit=20") for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do PR_NUM=$(echo "$OPEN_PRS" | jq -r ".[$i].number") PR_BRANCH=$(echo "$OPEN_PRS" | jq -r ".[$i].head.ref") PR_SHA=$(echo "$OPEN_PRS" | jq -r ".[$i].head.sha") # Extract issue number from branch name (fix/issue-NNN), PR title (#NNN), or PR body (Closes #NNN) PR_TITLE=$(echo "$OPEN_PRS" | jq -r ".[$i].title") PR_BODY=$(echo "$OPEN_PRS" | jq -r ".[$i].body // \"\"") STUCK_ISSUE=$(echo "$PR_BRANCH" | grep -oP '(?<=fix/issue-)\d+' || true) if [ -z "$STUCK_ISSUE" ]; then STUCK_ISSUE=$(echo "$PR_TITLE" | grep -oP '#\K\d+' | tail -1 || true) fi if [ -z "$STUCK_ISSUE" ]; then STUCK_ISSUE=$(echo "$PR_BODY" | grep -oiP '(?:closes|fixes|resolves)\s*#\K\d+' | head -1 || true) fi if [ -z "$STUCK_ISSUE" ]; then # Allow chore PRs from gardener/planner/predictor to merge without issue number if [[ "$PR_BRANCH" =~ ^chore/(gardener|planner|predictor)- ]]; then STUCK_ISSUE=0 else log "PR #${PR_NUM} has no issue ref — cannot spawn dev-agent, skipping" continue fi fi CI_STATE=$(ci_commit_status "$PR_SHA") || true # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$PR_NUM"; then CI_STATE="success" log "PR #${PR_NUM} has no code files — treating CI as passed" fi # Single fetch to avoid race window between review checks REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${PR_NUM}/reviews") || true HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true # Merge directly if approved + CI green (no Claude needed — single API call) if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then if try_direct_merge "$PR_NUM" "$STUCK_ISSUE"; then exit 0 fi # Direct merge failed — dev-agent fallback requires a real issue number if [ "$STUCK_ISSUE" -eq 0 ]; then log "PR #${PR_NUM} direct merge failed — no issue ref for dev-agent, skipping" continue fi # Direct merge failed (conflicts?) — fall back to dev-agent SESSION_NAME="dev-${PROJECT_NAME}-${STUCK_ISSUE}" if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then log "issue #${STUCK_ISSUE} already has active session ${SESSION_NAME} — skipping" else log "falling back to dev-agent for PR #${PR_NUM} merge" nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for stuck PR #${PR_NUM} (agent-merge)" fi exit 0 fi # Chore PRs without issue ref can only be direct-merged — skip dev-agent paths if [ "$STUCK_ISSUE" -eq 0 ]; then continue fi # Stuck: REQUEST_CHANGES or CI failure → spawn agent # Do NOT gate REQUEST_CHANGES on ci_passed: if a reviewer leaves REQUEST_CHANGES # while CI is still pending/unknown, we must act immediately rather than wait for # CI to settle. Definitive CI failure (non-pending, non-unknown) is handled by # the elif below, so we only spawn here when CI has not definitively failed. if [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then SESSION_NAME="dev-${PROJECT_NAME}-${STUCK_ISSUE}" if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then log "issue #${STUCK_ISSUE} already has active session ${SESSION_NAME} — skipping" continue fi log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) has REQUEST_CHANGES — fixing first" nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for stuck PR #${PR_NUM}" exit 0 elif ci_failed "$CI_STATE"; then SESSION_NAME="dev-${PROJECT_NAME}-${STUCK_ISSUE}" if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then log "issue #${STUCK_ISSUE} already has active session ${SESSION_NAME} — skipping" continue fi if handle_ci_exhaustion "$PR_NUM" "$STUCK_ISSUE" "check_only"; then continue # skip this PR, check next stuck PR or fall through to backlog fi # Increment at actual launch time (not on guard-hit paths) if handle_ci_exhaustion "$PR_NUM" "$STUCK_ISSUE"; then continue # exhausted between check and launch fi log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) CI failed — fixing (attempt ${CI_FIX_ATTEMPTS}/3)" nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for stuck PR #${PR_NUM}" exit 0 fi done # ============================================================================= # PRIORITY 2: find ready backlog issues (pull system) # # Two-tier pickup: priority+backlog issues first (FIFO), then plain backlog # issues (FIFO). The "priority" label is added alongside "backlog", not instead. # ============================================================================= log "scanning backlog for ready issues" # Ensure the priority label exists on this repo ensure_priority_label >/dev/null 2>&1 || true # Tier 1: issues with both "priority" and "backlog" labels PRIORITY_BACKLOG_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues?state=open&labels=priority,backlog&limit=20&type=issues&sort=oldest") || true PRIORITY_BACKLOG_JSON="${PRIORITY_BACKLOG_JSON:-[]}" # Tier 2: all "backlog" issues (includes priority ones — deduplicated below) ALL_BACKLOG_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues?state=open&labels=backlog&limit=20&type=issues&sort=oldest") # Combine: priority issues first, then remaining backlog issues (deduped) BACKLOG_JSON=$(jq -n \ --argjson pri "$PRIORITY_BACKLOG_JSON" \ --argjson all "$ALL_BACKLOG_JSON" \ '($pri | map(.number)) as $pnums | $pri + [$all[] | select(.number as $n | $pnums | map(. == $n) | any | not)]') BACKLOG_COUNT=$(echo "$BACKLOG_JSON" | jq 'length') if [ "$BACKLOG_COUNT" -eq 0 ]; then log "no backlog issues" exit 0 fi PRIORITY_COUNT=$(echo "$PRIORITY_BACKLOG_JSON" | jq 'length') log "found ${BACKLOG_COUNT} backlog issues (${PRIORITY_COUNT} priority)" # Check each for readiness READY_ISSUE="" for i in $(seq 0 $((BACKLOG_COUNT - 1))); do ISSUE_NUM=$(echo "$BACKLOG_JSON" | jq -r ".[$i].number") ISSUE_BODY=$(echo "$BACKLOG_JSON" | jq -r ".[$i].body // \"\"") # Formula guard: formula-labeled issues must not be picked up by dev-agent. # A formula issue that accidentally acquires the backlog label should be skipped. ISSUE_LABELS=$(echo "$BACKLOG_JSON" | jq -r ".[$i].labels[].name" 2>/dev/null) || true SKIP_LABEL=$(echo "$ISSUE_LABELS" | grep -oE '^(formula|action|prediction/backlog|prediction/unreviewed)$' | head -1) || true if [ -n "$SKIP_LABEL" ]; then log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — skipping in backlog scan" continue fi if ! issue_is_ready "$ISSUE_NUM" "$ISSUE_BODY"; then continue fi # Check if there's already an open PR for this issue that needs attention EXISTING_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls?state=open&limit=20" | \ jq -r --arg branch "fix/issue-${ISSUE_NUM}" --arg num "#${ISSUE_NUM}" \ '.[] | select((.head.ref == $branch) or (.title | contains($num))) | .number' | head -1) || true if [ -n "$EXISTING_PR" ]; then PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${EXISTING_PR}" | jq -r '.head.sha') || true CI_STATE=$(ci_commit_status "$PR_SHA") || true # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$EXISTING_PR"; then CI_STATE="success" log "PR #${EXISTING_PR} has no code files — treating CI as passed" fi # Single fetch to avoid race window between review checks REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${EXISTING_PR}/reviews") || true HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then if try_direct_merge "$EXISTING_PR" "$ISSUE_NUM"; then exit 0 fi # Direct merge failed (conflicts?) — fall back to dev-agent log "falling back to dev-agent for PR #${EXISTING_PR} merge" nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" exit 0 elif [ "${HAS_CHANGES:-0}" -gt 0 ]; then log "#${ISSUE_NUM} PR #${EXISTING_PR} has REQUEST_CHANGES — picking up" READY_ISSUE="$ISSUE_NUM" break elif ci_failed "$CI_STATE"; then if handle_ci_exhaustion "$EXISTING_PR" "$ISSUE_NUM" "check_only"; then # Don't add to WAITING_PRS — escalated PRs should not block new work continue fi log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — picking up (attempt $((CI_FIX_ATTEMPTS+1))/3)" READY_ISSUE="$ISSUE_NUM" READY_PR_FOR_INCREMENT="$EXISTING_PR" break else log "#${ISSUE_NUM} PR #${EXISTING_PR} exists (CI: ${CI_STATE}, waiting)" WAITING_PRS="${WAITING_PRS:-}${WAITING_PRS:+, }#${EXISTING_PR}" continue fi fi READY_ISSUE="$ISSUE_NUM" log "#${ISSUE_NUM} is READY (all deps merged, no existing PR)" break done # Single-threaded per project: if any issue has an open PR waiting for review/CI, # don't start new work — let the pipeline drain first if [ -n "$READY_ISSUE" ] && [ -n "${WAITING_PRS:-}" ]; then log "holding #${READY_ISSUE} — waiting for open PR(s) to land first: ${WAITING_PRS}" exit 0 fi if [ -z "$READY_ISSUE" ]; then log "no ready issues (all blocked by unmerged deps)" exit 0 fi # ============================================================================= # LAUNCH: start dev-agent for the ready issue # ============================================================================= # Deferred CI fix increment — only now that we're certain we are launching. # Uses the atomic ci_fix_check_and_increment (inside handle_ci_exhaustion) so # the counter is bumped exactly once even under concurrent poll invocations, # and a WAITING_PRS exit above cannot silently consume a fix attempt. if [ -n "${READY_PR_FOR_INCREMENT:-}" ]; then if handle_ci_exhaustion "$READY_PR_FOR_INCREMENT" "$READY_ISSUE"; then # exhausted (another poller incremented between scan and launch) — bail out exit 0 fi fi log "launching dev-agent for #${READY_ISSUE}" matrix_send "dev" "🚀 Starting dev-agent on issue #${READY_ISSUE}" 2>/dev/null || true rm -f "$PREFLIGHT_RESULT" nohup "${SCRIPT_DIR}/dev-agent.sh" "$READY_ISSUE" >> "$LOGFILE" 2>&1 & AGENT_PID=$! # Wait briefly for preflight (agent writes result before claiming) for _w in $(seq 1 30); do if [ -f "$PREFLIGHT_RESULT" ]; then break fi if ! kill -0 "$AGENT_PID" 2>/dev/null; then break fi sleep 2 done if [ -f "$PREFLIGHT_RESULT" ]; then PREFLIGHT_STATUS=$(jq -r '.status // "unknown"' < "$PREFLIGHT_RESULT") rm -f "$PREFLIGHT_RESULT" case "$PREFLIGHT_STATUS" in ready) log "dev-agent running for #${READY_ISSUE}" ;; unmet_dependency) log "#${READY_ISSUE} has code-level dependency (preflight blocked)" wait "$AGENT_PID" 2>/dev/null || true ;; too_large) REASON=$(jq -r '.reason // "unspecified"' < "$PREFLIGHT_RESULT" 2>/dev/null || echo "unspecified") log "#${READY_ISSUE} too large: ${REASON}" # Label as underspecified curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${API}/issues/${READY_ISSUE}/labels" \ -d "{\"labels\":[${UNDERSPECIFIED_LABEL_ID}]}" >/dev/null 2>&1 || true ;; already_done) log "#${READY_ISSUE} already done" ;; *) log "#${READY_ISSUE} unknown preflight: ${PREFLIGHT_STATUS}" ;; esac elif kill -0 "$AGENT_PID" 2>/dev/null; then log "dev-agent running for #${READY_ISSUE} (passed preflight)" else log "dev-agent exited for #${READY_ISSUE} without preflight result" fi