From 8f93ea3af11566ec13970a925ceb56da7edb51de Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Mar 2026 06:12:40 +0000 Subject: [PATCH 1/8] fix: Restore dev-poll.sh scheduler on SDK (#799) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrite dev-poll.sh to remove all tmux session management and use SDK shared libraries instead: - Remove _inject_into_session(), handle_active_session() — no tmux - Replace try_direct_merge() raw curl with pr_merge() from lib/pr-lifecycle.sh - Replace _post_ci_blocked_comment() with issue_block() from lib/issue-lifecycle.sh - Check PID lockfile instead of tmux sessions for active agent detection - Clean up .sid files instead of .phase files - Remove preflight wait loop (dev-agent.sh handles its own labels) - Extract extract_issue_from_pr() helper to DRY up issue number extraction Preserved from main: - Ready-issue scanning (backlog label + deps met) - Priority tier system (orphaned > priority+backlog > backlog) - Orphaned issue detection (in-progress label but no active agent) - Direct merge shortcut (approved + CI green -> merge without spawning agent) - CI fix exhaustion tracking (per-PR counter, max 3 attempts -> blocked label) Co-Authored-By: Claude Opus 4.6 (1M context) --- dev/dev-poll.sh | 518 +++++++++--------------------------------------- 1 file changed, 92 insertions(+), 426 deletions(-) diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index bddd05f..98b8b7d 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash # dev-poll.sh — Pull-based scheduler: find the next ready issue and start dev-agent # +# SDK version: No tmux — checks PID lockfile for active agents. +# Uses pr_merge() and issue_block() from shared libraries. +# # Pull system: issues labeled "backlog" are candidates. An issue is READY when # ALL its dependency issues are closed (and their PRs merged). # No "todo" label needed — readiness is derived from reality. @@ -16,38 +19,39 @@ set -euo pipefail -# Load shared environment (with optional project TOML override) +# Load shared environment and libraries export PROJECT_TOML="${1:-}" source "$(dirname "$0")/../lib/env.sh" source "$(dirname "$0")/../lib/ci-helpers.sh" +# shellcheck source=../lib/pr-lifecycle.sh +source "$(dirname "$0")/../lib/pr-lifecycle.sh" +# shellcheck source=../lib/issue-lifecycle.sh +source "$(dirname "$0")/../lib/issue-lifecycle.sh" # shellcheck source=../lib/mirrors.sh source "$(dirname "$0")/../lib/mirrors.sh" # shellcheck source=../lib/guard.sh source "$(dirname "$0")/../lib/guard.sh" check_active dev -# Gitea labels API requires []int64 — look up the "underspecified" label ID once -UNDERSPECIFIED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \ - | jq -r '.[] | select(.name == "underspecified") | .id' 2>/dev/null || true) -UNDERSPECIFIED_LABEL_ID="${UNDERSPECIFIED_LABEL_ID:-1300816}" +API="${FORGE_API}" +LOCKFILE="/tmp/dev-agent-${PROJECT_NAME:-default}.lock" +LOGFILE="${DISINTO_LOG_DIR}/dev/dev-agent-${PROJECT_NAME:-default}.log" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -# Track CI fix attempts per PR to avoid infinite respawn loops +log() { + printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +# ============================================================================= +# CI FIX TRACKER: per-PR counter to avoid infinite respawn loops (max 3) +# ============================================================================= CI_FIX_TRACKER="${DISINTO_LOG_DIR}/dev/ci-fixes-${PROJECT_NAME:-default}.json" CI_FIX_LOCK="${CI_FIX_TRACKER}.lock" + ci_fix_count() { local pr="$1" flock "$CI_FIX_LOCK" python3 -c "import json,sys;d=json.load(open('$CI_FIX_TRACKER')) if __import__('os').path.exists('$CI_FIX_TRACKER') else {};print(d.get(str($pr),0))" 2>/dev/null || echo 0 } -ci_fix_increment() { - local pr="$1" - flock "$CI_FIX_LOCK" python3 -c " -import json,os -f='$CI_FIX_TRACKER' -d=json.load(open(f)) if os.path.exists(f) else {} -d[str($pr)]=d.get(str($pr),0)+1 -json.dump(d,open(f,'w')) -" 2>/dev/null || true -} ci_fix_reset() { local pr="$1" flock "$CI_FIX_LOCK" python3 -c " @@ -90,44 +94,14 @@ is_blocked() { | jq -e '.[] | select(.name == "blocked")' >/dev/null 2>&1 } -# Post a CI-exhaustion diagnostic comment and label issue as blocked. -# Args: issue_num pr_num attempts -_post_ci_blocked_comment() { - local issue_num="$1" pr_num="$2" attempts="$3" - local blocked_id - blocked_id=$(ensure_blocked_label_id) - [ -z "$blocked_id" ] && return 0 - - local comment - comment="### Session failure diagnostic - -| Field | Value | -|---|---| -| Exit reason | \`ci_exhausted_poll (${attempts} attempts)\` | -| Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` | -| PR | #${pr_num} |" - - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/issues/${issue_num}/comments" \ - -d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/issues/${issue_num}/labels" \ - -d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true -} - # ============================================================================= # HELPER: handle CI-exhaustion check/block (DRY for 3 call sites) # Sets CI_FIX_ATTEMPTS for caller use. Returns 0 if exhausted, 1 if not. +# Uses issue_block() from lib/issue-lifecycle.sh for blocking. # # Pass "check_only" as third arg for the backlog scan path: ok-counts are # returned without incrementing (deferred to launch time so a WAITING_PRS -# exit cannot waste a fix attempt). The 3→4 sentinel bump is always atomic -# regardless of mode, preventing duplicate blocked labels from concurrent -# pollers. +# exit cannot waste a fix attempt). The 3->4 sentinel bump is always atomic. # ============================================================================= handle_ci_exhaustion() { local pr_num="$1" issue_num="$2" @@ -141,11 +115,6 @@ handle_ci_exhaustion() { return 0 fi - # Single flock-protected call: read + threshold-check + conditional bump. - # In check_only mode, ok-counts are returned without incrementing (deferred - # to launch time). In both modes, the 3→4 sentinel bump is atomic, so only - # one concurrent poller can ever receive exhausted_first_time:3 and label - # the issue blocked. result=$(ci_fix_check_and_increment "$pr_num" "$check_only") case "$result" in ok:*) @@ -155,7 +124,7 @@ handle_ci_exhaustion() { exhausted_first_time:*) CI_FIX_ATTEMPTS="${result#exhausted_first_time:}" log "PR #${pr_num} (issue #${issue_num}) CI exhausted (${CI_FIX_ATTEMPTS} attempts) — marking blocked" - _post_ci_blocked_comment "$issue_num" "$pr_num" "$CI_FIX_ATTEMPTS" + issue_block "$issue_num" "ci_exhausted_poll (${CI_FIX_ATTEMPTS} attempts, PR #${pr_num})" ;; exhausted:*) CI_FIX_ATTEMPTS="${result#exhausted:}" @@ -170,7 +139,7 @@ handle_ci_exhaustion() { } # ============================================================================= -# HELPER: merge an approved PR directly (no Claude needed) +# HELPER: merge an approved PR directly via pr_merge() (no Claude needed) # # Merging an approved, CI-green PR is a single API call. Spawning dev-agent # for this fails when the issue is already closed (forge auto-closes issues @@ -181,30 +150,15 @@ try_direct_merge() { log "PR #${pr_num} (issue #${issue_num}) approved + CI green → attempting direct merge" - local merge_resp merge_http - merge_resp=$(curl -sf -w '\n%{http_code}' -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H 'Content-Type: application/json' \ - "${API}/pulls/${pr_num}/merge" \ - -d '{"Do":"merge","delete_branch_after_merge":true}' 2>/dev/null) || true - - merge_http=$(echo "$merge_resp" | tail -1) - - if [ "${merge_http:-0}" = "200" ] || [ "${merge_http:-0}" = "204" ]; then + if pr_merge "$pr_num"; then log "PR #${pr_num} merged successfully" if [ "$issue_num" -gt 0 ]; then - # Close the issue (may already be closed by forge auto-close) - curl -sf -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H 'Content-Type: application/json' \ - "${API}/issues/${issue_num}" \ - -d '{"state":"closed"}' >/dev/null 2>&1 || true - # Remove in-progress label + issue_close "$issue_num" + # Remove in-progress label (don't re-add backlog — issue is closed) curl -sf -X DELETE \ -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues/${issue_num}/labels/in-progress" >/dev/null 2>&1 || true - # Clean up phase/session artifacts - rm -f "/tmp/dev-session-${PROJECT_NAME}-${issue_num}.phase" \ + rm -f "/tmp/dev-session-${PROJECT_NAME}-${issue_num}.sid" \ "/tmp/dev-impl-summary-${PROJECT_NAME}-${issue_num}.txt" fi # Pull merged primary branch and push to mirrors @@ -212,199 +166,68 @@ try_direct_merge() { git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH:-}" 2>/dev/null || true git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH:-}" 2>/dev/null || true mirror_push - # Clean up CI fix tracker ci_fix_reset "$pr_num" return 0 fi - log "PR #${pr_num} direct merge failed (HTTP ${merge_http:-?}) — falling back to dev-agent" + log "PR #${pr_num} direct merge failed — falling back to dev-agent" return 1 } # ============================================================================= -# HELPER: inject text into a tmux session via load-buffer + paste (#771) -# All tmux calls guarded with || true to prevent aborting under set -euo pipefail. -# Args: session text +# HELPER: extract issue number from PR branch/title/body # ============================================================================= -_inject_into_session() { - local session="$1" text="$2" - local tmpfile - tmpfile=$(mktemp /tmp/dev-poll-inject-XXXXXX) - printf '%s' "$text" > "$tmpfile" - tmux load-buffer -b "poll-inject-$$" "$tmpfile" || true - tmux paste-buffer -t "$session" -b "poll-inject-$$" || true - sleep 0.5 - tmux send-keys -t "$session" "" Enter || true - tmux delete-buffer -b "poll-inject-$$" 2>/dev/null || true - rm -f "$tmpfile" +extract_issue_from_pr() { + local branch="$1" title="$2" body="$3" + local issue + issue=$(echo "$branch" | grep -oP '(?<=fix/issue-)\d+' || true) + if [ -z "$issue" ]; then + issue=$(echo "$title" | grep -oP '#\K\d+' | tail -1 || true) + fi + if [ -z "$issue" ]; then + issue=$(echo "$body" | grep -oiP '(?:closes|fixes|resolves)\s*#\K\d+' | head -1 || true) + fi + printf '%s' "$issue" } # ============================================================================= -# HELPER: handle events for a running dev session (#771) -# -# When a tmux session is alive, check for injectable events instead of skipping. -# Handles: externally merged/closed PRs, CI results (awaiting_ci), and -# review feedback (awaiting_review). -# -# Args: session_name issue_num [pr_num] -# Sets: ACTIVE_SESSION_ACTION = "cleaned" | "injected" | "skip" +# DEPENDENCY HELPERS # ============================================================================= -# shellcheck disable=SC2034 # ACTIVE_SESSION_ACTION is read by callers -handle_active_session() { - local session="$1" issue_num="$2" pr_num="${3:-}" - local phase_file="/tmp/dev-session-${PROJECT_NAME}-${issue_num}.phase" - local sentinel="/tmp/dev-poll-injected-${PROJECT_NAME}-${issue_num}" - ACTIVE_SESSION_ACTION="skip" - - local phase - phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) - - local pr_json="" pr_sha="" pr_branch="" - - # --- Detect externally merged/closed PR --- - if [ -n "$pr_num" ]; then - local pr_state pr_merged - pr_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${pr_num}") || true - pr_state=$(printf '%s' "$pr_json" | jq -r '.state // "unknown"') - pr_sha=$(printf '%s' "$pr_json" | jq -r '.head.sha // ""') - pr_branch=$(printf '%s' "$pr_json" | jq -r '.head.ref // ""') - - if [ "$pr_state" != "open" ]; then - pr_merged=$(printf '%s' "$pr_json" | jq -r '.merged // false') - tmux kill-session -t "$session" 2>/dev/null || true - rm -f "$phase_file" "/tmp/dev-impl-summary-${PROJECT_NAME}-${issue_num}.txt" "$sentinel" - if [ "$pr_merged" = "true" ]; then - curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${issue_num}" -d '{"state":"closed"}' >/dev/null 2>&1 || true - fi - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${issue_num}/labels/in-progress" >/dev/null 2>&1 || true - ci_fix_reset "$pr_num" - log "PR #${pr_num} (issue #${issue_num}) merged/closed externally — cleaned up session ${session}" - ACTIVE_SESSION_ACTION="cleaned" - return 0 - fi - else - # No PR number — check if a merged PR exists for this issue's branch - local closed_pr closed_merged - closed_pr=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=closed&limit=10" | \ - jq -r --arg branch "fix/issue-${issue_num}" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - if [ -n "$closed_pr" ]; then - closed_merged=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${closed_pr}" | jq -r '.merged // false') || true - if [ "$closed_merged" = "true" ]; then - tmux kill-session -t "$session" 2>/dev/null || true - rm -f "$phase_file" "/tmp/dev-impl-summary-${PROJECT_NAME}-${issue_num}.txt" "$sentinel" - curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${issue_num}" -d '{"state":"closed"}' >/dev/null 2>&1 || true - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${issue_num}/labels/in-progress" >/dev/null 2>&1 || true - log "issue #${issue_num} PR #${closed_pr} merged externally — cleaned up session ${session}" - ACTIVE_SESSION_ACTION="cleaned" - return 0 - fi - fi - return 0 # no PR — can't inject CI/review events +dep_is_merged() { + local dep_num="$1" + local dep_state + dep_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${dep_num}" | jq -r '.state // "open"') + if [ "$dep_state" != "closed" ]; then + return 1 fi - - # Sentinel: avoid re-injecting for the same SHA across poll cycles - local last_injected - last_injected=$(cat "$sentinel" 2>/dev/null || true) - if [ -n "$last_injected" ] && [ "$last_injected" = "$pr_sha" ]; then - log "already injected for ${session} SHA ${pr_sha:0:7} — skipping" - return 0 - fi - - # --- Inject CI result into awaiting_ci session --- - if [ "$phase" = "PHASE:awaiting_ci" ] && [ -n "$pr_sha" ]; then - local ci_state - ci_state=$(ci_commit_status "$pr_sha") || true - - if ci_passed "$ci_state"; then - _inject_into_session "$session" "CI passed on PR #${pr_num}. - -Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback: - echo \"PHASE:awaiting_review\" > \"${phase_file}\"" - printf '%s' "$pr_sha" > "$sentinel" - log "injected CI success into session ${session} for PR #${pr_num}" - ACTIVE_SESSION_ACTION="injected" - return 0 - fi - - if ci_failed "$ci_state"; then - local pipeline_num error_log - pipeline_num=$(ci_pipeline_number "$pr_sha") || true - error_log="" - if [ -n "$pipeline_num" ]; then - error_log=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$pipeline_num" 2>/dev/null \ - | tail -80 | head -c 4000 || true) - fi - _inject_into_session "$session" "CI failed on PR #${pr_num} (pipeline #${pipeline_num:-?}). - -Error excerpt: -${error_log:-No logs available. Run: bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${pipeline_num:-0}} - -Fix the issue, commit, push, then write: - echo \"PHASE:awaiting_ci\" > \"${phase_file}\"" - printf '%s' "$pr_sha" > "$sentinel" - log "injected CI failure into session ${session} for PR #${pr_num}" - ACTIVE_SESSION_ACTION="injected" - return 0 - fi - fi - - # --- Inject review feedback into awaiting_review session --- - if [ "$phase" = "PHASE:awaiting_review" ] && [ -n "$pr_sha" ]; then - local reviews_json has_changes review_body - reviews_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${pr_num}/reviews") || true - has_changes=$(printf '%s' "$reviews_json" | \ - jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true - - if [ "${has_changes:-0}" -gt 0 ]; then - review_body=$(printf '%s' "$reviews_json" | \ - jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | last | .body // ""') || true - - # Prefer bot review comment if available (richer content) - local review_comment - review_comment=$(forge_api_all "/issues/${pr_num}/comments" | \ - jq -r --arg sha "$pr_sha" \ - '[.[] | select(.body | contains(""))]|length') @@ -61,6 +147,10 @@ HAS_CMT=$(printf '%s' "$ALL_COMMENTS" | jq --arg s "$PR_SHA" \ HAS_FML=$(forge_api_all "/pulls/${PR_NUMBER}/reviews" | jq --arg s "$PR_SHA" \ '[.[]|select(.commit_id==$s)|select(.state!="COMMENT")]|length') [ "${HAS_FML:-0}" -gt 0 ] && [ "$FORCE" != "--force" ] && { log "SKIP: formal review"; exit 0; } + +# ============================================================================= +# RE-REVIEW DETECTION +# ============================================================================= PREV_CONTEXT="" IS_RE_REVIEW=false PREV_SHA="" PREV_REV=$(printf '%s' "$ALL_COMMENTS" | jq -r --arg s "$PR_SHA" \ '[.[]|select(.body|contains("\nReview failed.\n---\n*${PR_SHA:0:7}*" \ @@ -162,11 +274,15 @@ if [ -z "$REVIEW_JSON" ]; then -H "Content-Type: application/json" "${API}/issues/${PR_NUMBER}/comments" -d @- || true exit 1 fi + VERDICT=$(printf '%s' "$REVIEW_JSON" | jq -r '.verdict' | tr '[:lower:]' '[:upper:]' | tr '-' '_') REASON=$(printf '%s' "$REVIEW_JSON" | jq -r '.verdict_reason // ""') REVIEW_MD=$(printf '%s' "$REVIEW_JSON" | jq -r '.review_markdown // ""') log "verdict: ${VERDICT}" +# ============================================================================= +# POST REVIEW +# ============================================================================= status "posting review" RTYPE="Review" if [ "$IS_RE_REVIEW" = true ]; then @@ -184,6 +300,9 @@ POST_RC=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ [ "$POST_RC" != "201" ] && { log "ERROR: comment HTTP ${POST_RC}"; exit 1; } log "posted review comment" +# ============================================================================= +# POST FORMAL REVIEW +# ============================================================================= REVENT="COMMENT" case "$VERDICT" in APPROVE) REVENT="APPROVED" ;; REQUEST_CHANGES|DISCUSS) REVENT="REQUEST_CHANGES" ;; esac if [ "$REVENT" = "APPROVED" ]; then @@ -204,10 +323,18 @@ curl -s -o /dev/null -X POST -H "Authorization: token ${FORGE_REVIEW_TOKEN}" \ --data-binary @"${REVIEW_TMPDIR}/formal.json" >/dev/null 2>&1 || true log "formal ${REVENT} submitted" +# ============================================================================= +# FINAL CLEANUP +# ============================================================================= case "$VERDICT" in - REQUEST_CHANGES|DISCUSS) printf 'PHASE:awaiting_changes\nSHA:%s\n' "$PR_SHA" > "$PHASE_FILE" ;; - *) rm -f "$PHASE_FILE" "$OUTPUT_FILE"; cd "${PROJECT_REPO_ROOT}" - git worktree remove "$WORKTREE" --force 2>/dev/null || true - rm -rf "$WORKTREE" 2>/dev/null || true ;; + REQUEST_CHANGES|DISCUSS) + # Keep session and worktree for re-review continuity + log "keeping session for re-review (SID: ${_AGENT_SESSION_ID:0:12}...)" + ;; + *) + rm -f "$SID_FILE" "$OUTPUT_FILE" + worktree_cleanup "$WORKTREE" + ;; esac + log "DONE: ${VERDICT} (re-review: ${IS_RE_REVIEW})" From c2e95799a0514f142e94875bdd65fafc771f3512 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Mar 2026 06:32:12 +0000 Subject: [PATCH 3/8] fix: Migrate review-pr.sh to SDK + pr-lifecycle (#800) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract agent_run() into shared lib/agent-sdk.sh to eliminate code duplication between dev-agent.sh and review-pr.sh (CI dedup check). Rewrite review-pr.sh from tmux-based agent-session.sh to synchronous claude -p invocations via shared agent-sdk.sh, matching the SDK pattern from dev-agent.sh (#798). Key changes: - Create lib/agent-sdk.sh with shared agent_run() function - Both dev-agent.sh and review-pr.sh now source lib/agent-sdk.sh instead of defining agent_run() inline - Replace agent-session.sh (tmux + monitor_phase_loop) with agent_run() - Add .sid file for session continuity: re-reviews resume the original session via --resume, so Claude remembers its prior review - Use worktree.sh for worktree cleanup - Remove phase file signaling — completion is automatic when claude -p returns - Keep all review business logic unchanged Co-Authored-By: Claude Opus 4.6 (1M context) --- dev/dev-agent.sh | 38 +------------------------------- lib/agent-sdk.sh | 53 +++++++++++++++++++++++++++++++++++++++++++++ review/review-pr.sh | 38 +------------------------------- 3 files changed, 55 insertions(+), 74 deletions(-) create mode 100644 lib/agent-sdk.sh diff --git a/dev/dev-agent.sh b/dev/dev-agent.sh index bd33136..f76041e 100755 --- a/dev/dev-agent.sh +++ b/dev/dev-agent.sh @@ -29,6 +29,7 @@ source "$(dirname "$0")/../lib/issue-lifecycle.sh" source "$(dirname "$0")/../lib/worktree.sh" source "$(dirname "$0")/../lib/pr-lifecycle.sh" source "$(dirname "$0")/../lib/mirrors.sh" +source "$(dirname "$0")/../lib/agent-sdk.sh" # Auto-pull factory code to pick up merged fixes before any logic runs git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true @@ -56,43 +57,6 @@ status() { log "$*" } -# ============================================================================= -# agent_run — synchronous Claude invocation (one-shot claude -p) -# ============================================================================= -# Usage: agent_run [--resume SESSION_ID] [--worktree DIR] PROMPT -# Sets: _AGENT_SESSION_ID (updated each call, persisted to SID_FILE) -_AGENT_SESSION_ID="" - -agent_run() { - local resume_id="" worktree_dir="" - while [[ "${1:-}" == --* ]]; do - case "$1" in - --resume) shift; resume_id="${1:-}"; shift ;; - --worktree) shift; worktree_dir="${1:-}"; shift ;; - *) shift ;; - esac - done - local prompt="${1:-}" - - local -a args=(-p "$prompt" --output-format json --dangerously-skip-permissions --max-turns 200) - [ -n "$resume_id" ] && args+=(--resume "$resume_id") - [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL") - - local run_dir="${worktree_dir:-$(pwd)}" - local output - log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})" - output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true - - # Extract and persist session_id - local new_sid - new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true - if [ -n "$new_sid" ]; then - _AGENT_SESSION_ID="$new_sid" - printf '%s' "$new_sid" > "$SID_FILE" - log "agent_run: session_id=${new_sid:0:12}..." - fi -} - # ============================================================================= # CLEANUP # ============================================================================= diff --git a/lib/agent-sdk.sh b/lib/agent-sdk.sh new file mode 100644 index 0000000..4199f78 --- /dev/null +++ b/lib/agent-sdk.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# agent-sdk.sh — Shared SDK for synchronous Claude agent invocations +# +# Provides agent_run(): one-shot `claude -p` with session persistence. +# Source this from any agent script after defining: +# SID_FILE — path to persist session ID (e.g. /tmp/dev-session-proj-123.sid) +# LOGFILE — path for log output +# log() — logging function +# +# Usage: +# source "$(dirname "$0")/../lib/agent-sdk.sh" +# agent_run [--resume SESSION_ID] [--worktree DIR] PROMPT +# +# After each call, _AGENT_SESSION_ID holds the session ID (also saved to SID_FILE). +# Recover a previous session on startup: +# if [ -f "$SID_FILE" ]; then _AGENT_SESSION_ID=$(cat "$SID_FILE"); fi + +set -euo pipefail + +_AGENT_SESSION_ID="" + +# agent_run — synchronous Claude invocation (one-shot claude -p) +# Usage: agent_run [--resume SESSION_ID] [--worktree DIR] PROMPT +# Sets: _AGENT_SESSION_ID (updated each call, persisted to SID_FILE) +agent_run() { + local resume_id="" worktree_dir="" + while [[ "${1:-}" == --* ]]; do + case "$1" in + --resume) shift; resume_id="${1:-}"; shift ;; + --worktree) shift; worktree_dir="${1:-}"; shift ;; + *) shift ;; + esac + done + local prompt="${1:-}" + + local -a args=(-p "$prompt" --output-format json --dangerously-skip-permissions --max-turns 200) + [ -n "$resume_id" ] && args+=(--resume "$resume_id") + [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL") + + local run_dir="${worktree_dir:-$(pwd)}" + local output + log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})" + output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true + + # Extract and persist session_id + local new_sid + new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true + if [ -n "$new_sid" ]; then + _AGENT_SESSION_ID="$new_sid" + printf '%s' "$new_sid" > "$SID_FILE" + log "agent_run: session_id=${new_sid:0:12}..." + fi +} diff --git a/review/review-pr.sh b/review/review-pr.sh index 00cf689..d4e3163 100755 --- a/review/review-pr.sh +++ b/review/review-pr.sh @@ -26,6 +26,7 @@ set -euo pipefail source "$(dirname "$0")/../lib/env.sh" source "$(dirname "$0")/../lib/ci-helpers.sh" source "$(dirname "$0")/../lib/worktree.sh" +source "$(dirname "$0")/../lib/agent-sdk.sh" # Auto-pull factory code to pick up merged fixes before any logic runs git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true @@ -48,43 +49,6 @@ status() { printf '[%s] PR #%s: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$PR cleanup() { rm -rf "$REVIEW_TMPDIR" "$LOCKFILE" "$STATUSFILE" "/tmp/${PROJECT_NAME}-review-graph-${PR_NUMBER}.json"; } trap cleanup EXIT -# ============================================================================= -# agent_run — synchronous Claude invocation (one-shot claude -p) -# ============================================================================= -# Usage: agent_run [--resume SESSION_ID] [--worktree DIR] PROMPT -# Sets: _AGENT_SESSION_ID (updated each call, persisted to SID_FILE) -_AGENT_SESSION_ID="" - -agent_run() { - local resume_id="" worktree_dir="" - while [[ "${1:-}" == --* ]]; do - case "$1" in - --resume) shift; resume_id="${1:-}"; shift ;; - --worktree) shift; worktree_dir="${1:-}"; shift ;; - *) shift ;; - esac - done - local prompt="${1:-}" - - local -a args=(-p "$prompt" --output-format json --dangerously-skip-permissions --max-turns 200) - [ -n "$resume_id" ] && args+=(--resume "$resume_id") - [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL") - - local run_dir="${worktree_dir:-$(pwd)}" - local output - log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})" - output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true - - # Extract and persist session_id - local new_sid - new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true - if [ -n "$new_sid" ]; then - _AGENT_SESSION_ID="$new_sid" - printf '%s' "$new_sid" > "$SID_FILE" - log "agent_run: session_id=${new_sid:0:12}..." - fi -} - # ============================================================================= # LOG ROTATION # ============================================================================= From 8f41230fa00c45f529c55095a484983831117d78 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Mar 2026 06:34:26 +0000 Subject: [PATCH 4/8] fix: Migrate review-pr.sh to SDK + pr-lifecycle (#800) Move SID_FILE recovery into agent_recover_session() in lib/agent-sdk.sh to eliminate remaining duplicate block between dev-agent.sh and review-pr.sh. Co-Authored-By: Claude Opus 4.6 (1M context) --- dev/dev-agent.sh | 5 +---- lib/agent-sdk.sh | 12 ++++++++++-- review/review-pr.sh | 5 +---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/dev/dev-agent.sh b/dev/dev-agent.sh index f76041e..3a78f53 100755 --- a/dev/dev-agent.sh +++ b/dev/dev-agent.sh @@ -243,10 +243,7 @@ if [ -n "$PR_NUMBER" ]; then fi # Recover session_id from .sid file (crash recovery) -if [ -f "$SID_FILE" ]; then - _AGENT_SESSION_ID=$(cat "$SID_FILE") - log "recovered session_id: ${_AGENT_SESSION_ID:0:12}..." -fi +agent_recover_session # ============================================================================= # WORKTREE SETUP diff --git a/lib/agent-sdk.sh b/lib/agent-sdk.sh index 4199f78..41879bf 100644 --- a/lib/agent-sdk.sh +++ b/lib/agent-sdk.sh @@ -12,13 +12,21 @@ # agent_run [--resume SESSION_ID] [--worktree DIR] PROMPT # # After each call, _AGENT_SESSION_ID holds the session ID (also saved to SID_FILE). -# Recover a previous session on startup: -# if [ -f "$SID_FILE" ]; then _AGENT_SESSION_ID=$(cat "$SID_FILE"); fi +# Call agent_recover_session() on startup to restore a previous session. set -euo pipefail _AGENT_SESSION_ID="" +# agent_recover_session — restore session_id from SID_FILE if it exists. +# Call this before agent_run --resume to enable session continuity. +agent_recover_session() { + if [ -f "$SID_FILE" ]; then + _AGENT_SESSION_ID=$(cat "$SID_FILE") + log "agent_recover_session: ${_AGENT_SESSION_ID:0:12}..." + fi +} + # agent_run — synchronous Claude invocation (one-shot claude -p) # Usage: agent_run [--resume SESSION_ID] [--worktree DIR] PROMPT # Sets: _AGENT_SESSION_ID (updated each call, persisted to SID_FILE) diff --git a/review/review-pr.sh b/review/review-pr.sh index d4e3163..0ae0fdb 100755 --- a/review/review-pr.sh +++ b/review/review-pr.sh @@ -135,10 +135,7 @@ if [ -n "$PREV_REV" ] && [ "$PREV_REV" != "null" ]; then fi # Recover session_id from .sid file (re-review continuity) -if [ -f "$SID_FILE" ]; then - _AGENT_SESSION_ID=$(cat "$SID_FILE") - log "recovered session_id: ${_AGENT_SESSION_ID:0:12}..." -fi +agent_recover_session # ============================================================================= # FETCH DIFF From d2c71e5dcd1b30ba1178f63eec077d9ba0079cbf Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Mar 2026 06:36:32 +0000 Subject: [PATCH 5/8] fix: Migrate review-pr.sh to SDK + pr-lifecycle (#800) Register lib/agent-sdk.sh in the CI smoke test so agent_recover_session resolves for dev-agent.sh and review-pr.sh. Co-Authored-By: Claude Opus 4.6 (1M context) --- .woodpecker/agent-smoke.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.woodpecker/agent-smoke.sh b/.woodpecker/agent-smoke.sh index dd8bf6a..94e9258 100644 --- a/.woodpecker/agent-smoke.sh +++ b/.woodpecker/agent-smoke.sh @@ -96,6 +96,7 @@ echo "=== 2/2 Function resolution ===" # Included — these are inline-sourced by agent scripts: # lib/env.sh — sourced by every agent (log, forge_api, etc.) # lib/agent-session.sh — sourced by orchestrators (create_agent_session, monitor_phase_loop, etc.) +# lib/agent-sdk.sh — sourced by SDK agents (agent_run, agent_recover_session) # lib/ci-helpers.sh — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.) # lib/load-project.sh — sourced by env.sh when PROJECT_TOML is set # lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue) @@ -115,7 +116,7 @@ echo "=== 2/2 Function resolution ===" # If a new lib file is added and sourced by agents, add it to LIB_FUNS below # and add a check_script call for it in the lib files section further down. LIB_FUNS=$( - for f in lib/agent-session.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do + for f in lib/agent-session.sh lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do if [ -f "$f" ]; then get_fns "$f"; fi done | sort -u ) @@ -180,6 +181,7 @@ check_script() { # but this verifies calls *within* each lib file are also resolvable. check_script lib/env.sh lib/mirrors.sh check_script lib/agent-session.sh +check_script lib/agent-sdk.sh check_script lib/ci-helpers.sh check_script lib/secret-scan.sh check_script lib/file-action-issue.sh lib/secret-scan.sh @@ -203,7 +205,7 @@ check_script dev/phase-handler.sh action/action-agent.sh lib/secret-scan.sh check_script dev/dev-poll.sh check_script dev/phase-test.sh check_script gardener/gardener-run.sh -check_script review/review-pr.sh lib/agent-session.sh +check_script review/review-pr.sh lib/agent-sdk.sh check_script review/review-poll.sh check_script planner/planner-run.sh lib/agent-session.sh lib/formula-session.sh check_script supervisor/supervisor-poll.sh From 1912a24c4669fb4871c2ee634d99b97bd645c9e3 Mon Sep 17 00:00:00 2001 From: johba Date: Sat, 28 Mar 2026 07:58:17 +0100 Subject: [PATCH 6/8] feat: edge proxy + staging container to docker stack (#807) This PR implements issue #764 by adding two Caddy-based containers to the disinto docker stack: ## Changes ### Edge Proxy Service - Caddy reverse proxy serving on ports 80/443 - Routes /forgejo/* -> Forgejo:3000 - Routes /ci/* -> Woodpecker:8000 - Default route -> staging container ### Staging Service - Caddy static file server for staging artifacts - Serves a default "Nothing shipped yet" page - CI pipelines can write to the staging-site volume to update content ### Files Modified - bin/disinto: Updated generate_compose() to add edge + staging services - bin/disinto: Added generate_caddyfile() function - bin/disinto: Added generate_staging_index() function - docker/staging-index.html: New default staging page ## Acceptance Criteria - [x] disinto init generates docker-compose.yml with edge + staging services - [x] Edge proxy routes /forgejo/*, /ci/*, and default routes correctly - [x] Staging container serves default "Nothing shipped yet" page - [x] docker/ directory contains Caddyfile template generated by disinto init - [x] disinto up starts all containers including edge and staging Co-authored-by: johba Reviewed-on: https://codeberg.org/johba/disinto/pulls/807 --- bin/disinto | 121 +++++++++++++++++++++++++++++++++++++++++++++- docker/index.html | 38 +++++++++++++++ 2 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 docker/index.html diff --git a/bin/disinto b/bin/disinto index 3ec1ce0..ef6924d 100755 --- a/bin/disinto +++ b/bin/disinto @@ -260,10 +260,37 @@ services: networks: - disinto-net + # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging + # Serves on ports 80/443, routes based on path + edge: + image: caddy:alpine + ports: + - "80:80" + - "443:443" + volumes: + - ./docker/Caddyfile:/etc/caddy/Caddyfile + - caddy_data:/data + depends_on: + - forgejo + - woodpecker + - staging + networks: + - disinto-net + + # Staging container — static file server for staging artifacts + # Edge proxy routes to this container for default requests + staging: + image: caddy:alpine + command: ["caddy", "file-server", "--root", "/srv/site"] + volumes: + - ./docker:/srv/site:ro + networks: + - disinto-net + # Staging deployment slot — activated by Woodpecker staging pipeline (#755). # Profile-gated: only starts when explicitly targeted by deploy commands. # Customize image/ports/volumes for your project after init. - staging: + staging-deploy: image: alpine:3 profiles: ["staging"] security_opt: @@ -279,6 +306,7 @@ volumes: woodpecker-data: agent-data: project-repos: + caddy_data: networks: disinto-net: @@ -321,6 +349,95 @@ generate_agent_docker() { fi } +# Generate docker/Caddyfile template for edge proxy. +generate_caddyfile() { + local docker_dir="${FACTORY_ROOT}/docker" + local caddyfile="${docker_dir}/Caddyfile" + + if [ -f "$caddyfile" ]; then + echo "Caddyfile: ${caddyfile} (already exists, skipping)" + return + fi + + cat > "$caddyfile" <<'CADDYFILEEOF' +# Caddyfile — edge proxy configuration +# IP-only binding at bootstrap; domain + TLS added later via vault resource request + +:80 { + # Reverse proxy to Forgejo + handle /forgejo/* { + reverse_proxy forgejo:3000 + } + + # Reverse proxy to Woodpecker CI + handle /ci/* { + reverse_proxy woodpecker:8000 + } + + # Default: proxy to staging container + handle { + reverse_proxy staging:80 + } +} +CADDYFILEEOF + + echo "Created: ${caddyfile}" +} + +# Generate docker/index.html default page. +generate_staging_index() { + local docker_dir="${FACTORY_ROOT}/docker" + local index_file="${docker_dir}/index.html" + + if [ -f "$index_file" ]; then + echo "Staging: ${index_file} (already exists, skipping)" + return + fi + + cat > "$index_file" <<'INDEXEOF' + + + + + + Nothing shipped yet + + + +
+

Nothing shipped yet

+

CI pipelines will update this page with your staging artifacts.

+
+ + +INDEXEOF + + echo "Created: ${index_file}" +} + # Generate template .woodpecker/ deployment pipeline configs in a project repo. # Creates staging.yml and production.yml alongside the project's existing CI config. # These pipelines trigger on Woodpecker's deployment event with environment filters. @@ -1599,6 +1716,8 @@ p.write_text(text) forge_port="${forge_port:-3000}" generate_compose "$forge_port" generate_agent_docker + generate_caddyfile + generate_staging_index # Create empty .env so docker compose can parse the agents service # env_file reference before setup_forge generates the real tokens (#769) touch "${FACTORY_ROOT}/.env" diff --git a/docker/index.html b/docker/index.html new file mode 100644 index 0000000..de327d5 --- /dev/null +++ b/docker/index.html @@ -0,0 +1,38 @@ + + + + + + Nothing shipped yet + + + +
+

Nothing shipped yet

+

CI pipelines will update this page with your staging artifacts.

+
+ + From 5adf34e6956cca7520c80d8bae063731d3b34e90 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Mar 2026 07:02:50 +0000 Subject: [PATCH 7/8] fix: Migrate gardener-run.sh to SDK + pr-lifecycle (#801) Replace tmux-based run_formula_and_monitor architecture with synchronous agent_run() from agent-sdk.sh. Replace custom CI/review/merge phase callbacks (~350 lines) with pr_walk_to_merge() from pr-lifecycle.sh. Key changes: - Source agent-sdk.sh + pr-lifecycle.sh instead of agent-session.sh - One-shot claude -p invocation replaces tmux session management - Bash script IS the state machine (no phase files needed) - Keep _gardener_execute_manifest() for post-merge manifest execution - Keep all guards, formula loading, context building unchanged Co-Authored-By: Claude Opus 4.6 (1M context) --- gardener/gardener-run.sh | 506 +++++++-------------------------------- 1 file changed, 88 insertions(+), 418 deletions(-) diff --git a/gardener/gardener-run.sh b/gardener/gardener-run.sh index 8b3e2ae..733583d 100755 --- a/gardener/gardener-run.sh +++ b/gardener/gardener-run.sh @@ -1,10 +1,18 @@ #!/usr/bin/env bash # ============================================================================= -# gardener-run.sh — Cron wrapper: gardener execution via Claude + formula +# gardener-run.sh — Cron wrapper: gardener execution via SDK + formula # -# Runs 4x/day (or on-demand). Guards against concurrent runs and low memory. -# Creates a tmux session with Claude (sonnet) reading formulas/run-gardener.toml. -# No action issues — the gardener is a nervous system component, not work (AD-001). +# Synchronous bash loop using claude -p (one-shot invocation). +# No tmux sessions, no phase files — the bash script IS the state machine. +# +# Flow: +# 1. Guards: cron lock, memory check +# 2. Load formula (formulas/run-gardener.toml) +# 3. Build context: AGENTS.md, scratch file, prompt footer +# 4. agent_run(worktree, prompt) → Claude does maintenance, pushes if needed +# 5. If pushed: pr_walk_to_merge() from lib/pr-lifecycle.sh +# 6. Post-merge: execute pending actions manifest (gardener/pending-actions.json) +# 7. Mirror push # # Usage: # gardener-run.sh [projects/disinto.toml] # project config (default: disinto) @@ -22,8 +30,6 @@ export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}" source "$FACTORY_ROOT/lib/env.sh" # Use gardener-bot's own Forgejo identity (#747) FORGE_TOKEN="${FORGE_GARDENER_TOKEN:-${FORGE_TOKEN}}" -# shellcheck source=../lib/agent-session.sh -source "$FACTORY_ROOT/lib/agent-session.sh" # shellcheck source=../lib/formula-session.sh source "$FACTORY_ROOT/lib/formula-session.sh" # shellcheck source=../lib/worktree.sh @@ -34,26 +40,20 @@ source "$FACTORY_ROOT/lib/ci-helpers.sh" source "$FACTORY_ROOT/lib/mirrors.sh" # shellcheck source=../lib/guard.sh source "$FACTORY_ROOT/lib/guard.sh" +# shellcheck source=../lib/agent-sdk.sh +source "$FACTORY_ROOT/lib/agent-sdk.sh" +# shellcheck source=../lib/pr-lifecycle.sh +source "$FACTORY_ROOT/lib/pr-lifecycle.sh" LOG_FILE="$SCRIPT_DIR/gardener.log" -# shellcheck disable=SC2034 # consumed by run_formula_and_monitor -SESSION_NAME="gardener-${PROJECT_NAME}" -PHASE_FILE="/tmp/gardener-session-${PROJECT_NAME}.phase" - -# shellcheck disable=SC2034 # read by monitor_phase_loop in lib/agent-session.sh -PHASE_POLL_INTERVAL=15 - +# shellcheck disable=SC2034 # consumed by agent-sdk.sh +LOGFILE="$LOG_FILE" +# shellcheck disable=SC2034 # consumed by agent-sdk.sh +SID_FILE="/tmp/gardener-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/gardener-${PROJECT_NAME}-scratch.md" RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt" GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt" - -# Merge-through state (used by _gardener_on_phase_change callback) -_GARDENER_PR="" -_GARDENER_MERGE_START=0 -_GARDENER_MERGE_TIMEOUT=1800 # 30 min -_GARDENER_CI_FIX_COUNT=0 -_GARDENER_REVIEW_ROUND=0 -_GARDENER_CRASH_COUNT=0 +WORKTREE="/tmp/${PROJECT_NAME}-gardener-run" log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } @@ -72,7 +72,7 @@ build_context_block AGENTS.md SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") -# ── Build prompt (manifest format reference for deferred actions) ───────── +# ── Build prompt ───────────────────────────────────────────────────────── GARDENER_API_EXTRA=" ## Pending-actions manifest (REQUIRED) @@ -91,28 +91,28 @@ Supported actions: The commit-and-pr step converts JSONL to JSON array. The orchestrator executes actions after the PR merges. Do NOT call mutation APIs directly during the run." -build_prompt_footer "$GARDENER_API_EXTRA" -# Extend phase protocol with merge-through instructions for compaction survival -PROMPT_FOOTER="${PROMPT_FOOTER} +PROMPT_FOOTER="## Forge API reference +Base URL: ${FORGE_API} +Auth header: -H \"Authorization: token \${FORGE_TOKEN}\" + Read issue: curl -sf -H \"Authorization: token \${FORGE_TOKEN}\" '${FORGE_API}/issues/{number}' | jq '.body' + Create issue: curl -sf -X POST -H \"Authorization: token \${FORGE_TOKEN}\" -H 'Content-Type: application/json' '${FORGE_API}/issues' -d '{\"title\":\"...\",\"body\":\"...\",\"labels\":[LABEL_ID]}'${GARDENER_API_EXTRA} + List labels: curl -sf -H \"Authorization: token \${FORGE_TOKEN}\" '${FORGE_API}/labels' +NEVER echo or include the actual token value in output — always reference \${FORGE_TOKEN}. -## Merge-through protocol (commit-and-pr step) -After creating the PR, write the PR number and signal CI: +## Environment +FACTORY_ROOT=${FACTORY_ROOT} +PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT} +OPS_REPO_ROOT=${OPS_REPO_ROOT} +PRIMARY_BRANCH=${PRIMARY_BRANCH} + +## Completion protocol (REQUIRED) +When the commit-and-pr step creates a PR, write the PR number and stop: echo \"\$PR_NUMBER\" > '${GARDENER_PR_FILE}' - echo 'PHASE:awaiting_ci' > '${PHASE_FILE}' -Then STOP and WAIT for CI results. -When 'CI passed' is injected: - echo 'PHASE:awaiting_review' > '${PHASE_FILE}' -Then STOP and WAIT. -When 'CI failed' is injected: - Fix, commit, push, then: echo 'PHASE:awaiting_ci' > '${PHASE_FILE}' -When review feedback is injected: - Address all feedback, commit, push, then: echo 'PHASE:awaiting_ci' > '${PHASE_FILE}' -If no file changes in commit-and-pr: - echo 'PHASE:done' > '${PHASE_FILE}'" +Then STOP. Do NOT write PHASE: signals — the orchestrator handles CI, review, and merge. +If no file changes exist (empty commit-and-pr), just stop — no PR needed." -# shellcheck disable=SC2034 # consumed by run_formula_and_monitor -PROMPT="You are the issue gardener for ${FORGE_REPO}. Work through the formula below. Follow the phase protocol: if the commit-and-pr step creates a PR, write PHASE:awaiting_ci and wait for orchestrator CI/review/merge handling. If no file changes, write PHASE:done. The orchestrator will time you out if you return to the prompt without signalling. +PROMPT="You are the issue gardener for ${FORGE_REPO}. Work through the formula below. You have full shell access and --dangerously-skip-permissions. Fix what you can. File vault items for what you cannot. Do NOT ask permission — act first, report after. @@ -130,14 +130,21 @@ ${FORMULA_CONTENT} ${SCRATCH_INSTRUCTION} ${PROMPT_FOOTER}" -# ── Phase callback for merge-through ───────────────────────────────────── -# Handles CI polling, review injection, merge, and cleanup after PR creation. -# Lighter than dev/phase-handler.sh — tailored for gardener doc-only PRs. +# ── Create worktree ────────────────────────────────────────────────────── +cd "$PROJECT_REPO_ROOT" +git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true +worktree_cleanup "$WORKTREE" +git worktree add "$WORKTREE" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null -# ── Post-merge manifest execution ───────────────────────────────────── +cleanup() { + worktree_cleanup "$WORKTREE" + rm -f "$GARDENER_PR_FILE" +} +trap cleanup EXIT + +# ── Post-merge manifest execution ──────────────────────────────────────── # Reads gardener/pending-actions.json and executes each action via API. # Failed actions are logged but do not block completion. -# shellcheck disable=SC2317 # called indirectly via _gardener_merge _gardener_execute_manifest() { local manifest_file="$PROJECT_REPO_ROOT/gardener/pending-actions.json" if [ ! -f "$manifest_file" ]; then @@ -295,387 +302,50 @@ _gardener_execute_manifest() { log "manifest: execution complete (${count} actions processed)" } -# shellcheck disable=SC2317 # called indirectly by monitor_phase_loop -_gardener_merge() { - local merge_response merge_http_code - merge_response=$(curl -s -w "\n%{http_code}" -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H 'Content-Type: application/json' \ - "${FORGE_API}/pulls/${_GARDENER_PR}/merge" \ - -d '{"Do":"merge","delete_branch_after_merge":true}') || true - merge_http_code=$(echo "$merge_response" | tail -1) +# ── Reset result file ──────────────────────────────────────────────────── +rm -f "$RESULT_FILE" "$GARDENER_PR_FILE" +touch "$RESULT_FILE" - if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then - log "gardener PR #${_GARDENER_PR} merged" - # Pull merged primary branch and push to mirrors +# ── Run agent ───────────────────────────────────────────────────────────── +export CLAUDE_MODEL="sonnet" + +agent_run --worktree "$WORKTREE" "$PROMPT" +log "agent_run complete" + +# ── Detect PR ───────────────────────────────────────────────────────────── +PR_NUMBER="" +if [ -f "$GARDENER_PR_FILE" ]; then + PR_NUMBER=$(tr -d '[:space:]' < "$GARDENER_PR_FILE") +fi + +# Fallback: search for open gardener PRs +if [ -z "$PR_NUMBER" ]; then + PR_NUMBER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/pulls?state=open&limit=10" | \ + jq -r '[.[] | select(.head.ref | startswith("chore/gardener-"))] | .[0].number // empty') || true +fi + +# ── Walk PR to merge ────────────────────────────────────────────────────── +if [ -n "$PR_NUMBER" ]; then + log "walking PR #${PR_NUMBER} to merge" + pr_walk_to_merge "$PR_NUMBER" "$_AGENT_SESSION_ID" "$WORKTREE" || true + + if [ "$_PR_WALK_EXIT_REASON" = "merged" ]; then + # Post-merge: pull primary, mirror push, execute manifest git -C "$PROJECT_REPO_ROOT" fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$PROJECT_REPO_ROOT" pull --ff-only origin "$PRIMARY_BRANCH" 2>/dev/null || true mirror_push _gardener_execute_manifest - printf 'PHASE:done\n' > "$PHASE_FILE" - return 0 - fi - - # Already merged (race)? - if [ "$merge_http_code" = "405" ]; then - local pr_merged - pr_merged=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/pulls/${_GARDENER_PR}" | jq -r '.merged // false') || true - if [ "$pr_merged" = "true" ]; then - log "gardener PR #${_GARDENER_PR} already merged" - # Pull merged primary branch and push to mirrors - git -C "$PROJECT_REPO_ROOT" fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true - git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true - git -C "$PROJECT_REPO_ROOT" pull --ff-only origin "$PRIMARY_BRANCH" 2>/dev/null || true - mirror_push - _gardener_execute_manifest - printf 'PHASE:done\n' > "$PHASE_FILE" - return 0 - fi - log "gardener merge blocked (HTTP 405)" - printf 'PHASE:failed\nReason: gardener PR #%s merge blocked (HTTP 405)\n' \ - "$_GARDENER_PR" > "$PHASE_FILE" - return 0 - fi - - # Other failure (likely conflicts) — tell Claude to rebase - log "gardener merge failed (HTTP ${merge_http_code}) — requesting rebase" - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ - "Merge failed for PR #${_GARDENER_PR} (likely conflicts). Rebase and push: - git fetch origin ${PRIMARY_BRANCH} && git rebase origin/${PRIMARY_BRANCH} - git push --force-with-lease origin HEAD - echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" -If rebase fails, write PHASE:failed with a reason." -} - -# shellcheck disable=SC2317 # called indirectly by monitor_phase_loop -_gardener_timeout_cleanup() { - log "gardener merge-through timed out (${_GARDENER_MERGE_TIMEOUT}s) — closing PR" - if [ -n "$_GARDENER_PR" ]; then - curl -sf -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H 'Content-Type: application/json' \ - "${FORGE_API}/pulls/${_GARDENER_PR}" \ - -d '{"state":"closed"}' >/dev/null 2>&1 || true - fi - printf 'PHASE:failed\nReason: merge-through timeout (%ss)\n' \ - "$_GARDENER_MERGE_TIMEOUT" > "$PHASE_FILE" -} - -# shellcheck disable=SC2317 # called indirectly by monitor_phase_loop -_gardener_handle_ci() { - # Start merge-through timer on first CI phase - if [ "$_GARDENER_MERGE_START" -eq 0 ]; then - _GARDENER_MERGE_START=$(date +%s) - fi - - # Check merge-through timeout - local elapsed - elapsed=$(( $(date +%s) - _GARDENER_MERGE_START )) - if [ "$elapsed" -ge "$_GARDENER_MERGE_TIMEOUT" ]; then - _gardener_timeout_cleanup - return 0 - fi - - # Discover PR number if unknown - if [ -z "$_GARDENER_PR" ]; then - if [ -f "$GARDENER_PR_FILE" ]; then - _GARDENER_PR=$(tr -d '[:space:]' < "$GARDENER_PR_FILE") - fi - # Fallback: search for open gardener PRs - if [ -z "$_GARDENER_PR" ]; then - _GARDENER_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/pulls?state=open&limit=10" | \ - jq -r '[.[] | select(.head.ref | startswith("chore/gardener-"))] | .[0].number // empty') || true - fi - if [ -z "$_GARDENER_PR" ]; then - log "ERROR: cannot find gardener PR" - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ - "ERROR: Could not find the gardener PR. Verify branch was pushed and PR created. Write the PR number to ${GARDENER_PR_FILE}, then write PHASE:awaiting_ci again." - return 0 - fi - log "tracking gardener PR #${_GARDENER_PR}" - fi - - # Skip CI for doc-only PRs - if ! ci_required_for_pr "$_GARDENER_PR" 2>/dev/null; then - log "CI not required (doc-only) — treating as passed" - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ - "CI passed on PR #${_GARDENER_PR} (doc-only changes, CI not required). -Write PHASE:awaiting_review to the phase file, then stop and wait: - echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" - return 0 - fi - - # No CI configured? - if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then - log "no CI configured — treating as passed" - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ - "CI passed on PR #${_GARDENER_PR} (no CI configured). -Write PHASE:awaiting_review to the phase file, then stop and wait: - echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" - return 0 - fi - - # Get HEAD SHA from PR - local head_sha - head_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/pulls/${_GARDENER_PR}" | jq -r '.head.sha // empty') || true - - if [ -z "$head_sha" ]; then - log "WARNING: could not get HEAD SHA for PR #${_GARDENER_PR}" - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ - "WARNING: Could not read HEAD SHA for PR #${_GARDENER_PR}. Verify push succeeded. Then write PHASE:awaiting_ci again." - return 0 - fi - - # Poll CI (15 min max within this phase) - local ci_done=false ci_state="unknown" ci_elapsed=0 ci_timeout=900 - while [ "$ci_elapsed" -lt "$ci_timeout" ]; do - sleep 30 - ci_elapsed=$((ci_elapsed + 30)) - - # Session health check - if [ -f "/tmp/claude-exited-${_MONITOR_SESSION:-$SESSION_NAME}.ts" ] || \ - ! tmux has-session -t "${_MONITOR_SESSION:-$SESSION_NAME}" 2>/dev/null; then - log "session died during CI wait" - return 0 - fi - - # Merge-through timeout check - elapsed=$(( $(date +%s) - _GARDENER_MERGE_START )) - if [ "$elapsed" -ge "$_GARDENER_MERGE_TIMEOUT" ]; then - _gardener_timeout_cleanup - return 0 - fi - - # Re-fetch HEAD in case Claude pushed new commits - head_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/pulls/${_GARDENER_PR}" | jq -r '.head.sha // empty') || true - - ci_state=$(ci_commit_status "$head_sha") || ci_state="unknown" - - case "$ci_state" in - success|failure|error) ci_done=true; break ;; - esac - done - - if ! $ci_done; then - log "CI timeout for PR #${_GARDENER_PR}" - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ - "CI TIMEOUT: CI did not complete within 15 minutes for PR #${_GARDENER_PR}. Write PHASE:failed with a reason if you cannot proceed." - return 0 - fi - - log "CI: ${ci_state} for PR #${_GARDENER_PR}" - - if [ "$ci_state" = "success" ]; then - _GARDENER_CI_FIX_COUNT=0 - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ - "CI passed on PR #${_GARDENER_PR}. -Write PHASE:awaiting_review to the phase file, then stop and wait: - echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" + rm -f "$SCRATCH_FILE" + log "gardener PR #${PR_NUMBER} merged — manifest executed" else - _GARDENER_CI_FIX_COUNT=$(( _GARDENER_CI_FIX_COUNT + 1 )) - if [ "$_GARDENER_CI_FIX_COUNT" -gt 3 ]; then - log "CI exhausted after ${_GARDENER_CI_FIX_COUNT} attempts" - printf 'PHASE:failed\nReason: gardener CI exhausted after %d attempts\n' \ - "$_GARDENER_CI_FIX_COUNT" > "$PHASE_FILE" - return 0 - fi - - # Get error details - local pipeline_num ci_error_log - pipeline_num=$(ci_pipeline_number "$head_sha") - - ci_error_log="" - if [ -n "$pipeline_num" ]; then - ci_error_log=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$pipeline_num" 2>/dev/null \ - | tail -80 | head -c 8000 || true) - fi - - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ - "CI failed on PR #${_GARDENER_PR} (attempt ${_GARDENER_CI_FIX_COUNT}/3). -${ci_error_log:+Error output: -${ci_error_log} -}Fix the issue, commit, push, then write: - echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" -Then stop and wait." + log "PR #${PR_NUMBER} not merged (reason: ${_PR_WALK_EXIT_REASON:-unknown})" fi -} - -# shellcheck disable=SC2317 # called indirectly by monitor_phase_loop -_gardener_handle_review() { - log "waiting for review on PR #${_GARDENER_PR:-?}" - _GARDENER_CI_FIX_COUNT=0 # Reset CI fix budget for next review cycle - - local review_elapsed=0 review_timeout=1800 - while [ "$review_elapsed" -lt "$review_timeout" ]; do - sleep 60 # 1 min between review checks (gardener PRs are fast-tracked) - review_elapsed=$((review_elapsed + 60)) - - # Session health check - if [ -f "/tmp/claude-exited-${_MONITOR_SESSION:-$SESSION_NAME}.ts" ] || \ - ! tmux has-session -t "${_MONITOR_SESSION:-$SESSION_NAME}" 2>/dev/null; then - log "session died during review wait" - return 0 - fi - - # Merge-through timeout check - local elapsed - elapsed=$(( $(date +%s) - _GARDENER_MERGE_START )) - if [ "$elapsed" -ge "$_GARDENER_MERGE_TIMEOUT" ]; then - _gardener_timeout_cleanup - return 0 - fi - - # Check if phase changed while we wait (e.g. review-poll injected feedback) - local new_mtime - new_mtime=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) - if [ "$new_mtime" -gt "${LAST_PHASE_MTIME:-0}" ]; then - log "phase changed during review wait — returning to monitor loop" - return 0 - fi - - # Check for review on current HEAD - local review_sha review_comment - review_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/pulls/${_GARDENER_PR}" | jq -r '.head.sha // empty') || true - - review_comment=$(forge_api_all "/issues/${_GARDENER_PR}/comments" 2>/dev/null | \ - jq -r --arg sha "${review_sha:-none}" \ - '[.[] | select(.body | contains("