From 578a6ec81d1556967526f672ecbdcba9743c7788 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 22 Mar 2026 20:48:07 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20feat:=20gardener=20should=20stay=20alive?= =?UTF-8?q?=20until=20its=20PR=20merges=20=E2=80=94=20address=20review=20f?= =?UTF-8?q?eedback,=20rebase=20if=20needed=20(#571)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- formulas/run-gardener.toml | 26 ++- gardener/gardener-run.sh | 406 ++++++++++++++++++++++++++++++++++++- lib/formula-session.sh | 5 +- 3 files changed, 424 insertions(+), 13 deletions(-) diff --git a/formulas/run-gardener.toml b/formulas/run-gardener.toml index 33bc44e..48d2f04 100644 --- a/formulas/run-gardener.toml +++ b/formulas/run-gardener.toml @@ -419,7 +419,7 @@ needs = ["blocked-review"] [[steps]] id = "commit-and-pr" -title = "One commit with all file changes, push, create PR" +title = "One commit with all file changes, push, create PR, monitor to merge" description = """ Collect all file changes from this run (AGENTS.md updates) into a single commit. API calls (issue creation, PR comments, closures) already happened during the @@ -429,7 +429,7 @@ run — only file changes need the PR. cd "$PROJECT_REPO_ROOT" git status --porcelain - If there are no file changes, skip this entire step — no commit, no PR. + If there are no file changes, skip to step 3 — no commit, no PR needed. 2. If there are changes: a. Create a branch: @@ -444,16 +444,26 @@ run — only file changes need the PR. e. Push: git push -u origin "$BRANCH" f. Create a PR: - curl -sf -X POST \ + PR_RESPONSE=$(curl -sf -X POST \ -H "Authorization: token $CODEBERG_TOKEN" \ -H "Content-Type: application/json" \ "$CODEBERG_API/pulls" \ -d '{"title":"chore: gardener housekeeping", - "head":"","base":"", - "body":"Automated gardener housekeeping — AGENTS.md updates.\n\nReview-agent fast-tracks doc-only PRs."}' - g. Return to primary branch: - git checkout "$PRIMARY_BRANCH" + "head":"'"$BRANCH"'","base":"'"$PRIMARY_BRANCH"'", + "body":"Automated gardener housekeeping — AGENTS.md updates.\\n\\nReview-agent fast-tracks doc-only PRs."}') + PR_NUMBER=$(echo "$PR_RESPONSE" | jq -r '.number') + g. Save PR number for orchestrator tracking: + echo "$PR_NUMBER" > /tmp/gardener-pr-${PROJECT_NAME}.txt + h. Signal the orchestrator to monitor CI: + echo "PHASE:awaiting_ci" > "$PHASE_FILE" + i. STOP and WAIT. Do NOT return to the primary branch. + The orchestrator polls CI, injects results and review feedback. + When you receive injected CI or review feedback, follow its + instructions, then write PHASE:awaiting_ci and wait again. -3. If the PR creation fails (e.g. no changes after staging), log and continue. +3. If no file changes existed (step 1 found nothing): + echo "PHASE:done" > "$PHASE_FILE" + +4. If PR creation fails, log the error and write PHASE:failed. """ needs = ["agents-update"] diff --git a/gardener/gardener-run.sh b/gardener/gardener-run.sh index a4216a6..775a854 100755 --- a/gardener/gardener-run.sh +++ b/gardener/gardener-run.sh @@ -24,6 +24,8 @@ source "$FACTORY_ROOT/lib/env.sh" source "$FACTORY_ROOT/lib/agent-session.sh" # shellcheck source=../lib/formula-session.sh source "$FACTORY_ROOT/lib/formula-session.sh" +# shellcheck source=../lib/ci-helpers.sh +source "$FACTORY_ROOT/lib/ci-helpers.sh" LOG_FILE="$SCRIPT_DIR/gardener.log" # shellcheck disable=SC2034 # consumed by run_formula_and_monitor @@ -35,6 +37,15 @@ PHASE_POLL_INTERVAL=15 SCRATCH_FILE="/tmp/gardener-${PROJECT_NAME}-scratch.md" RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt" +GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt" + +# Merge-through state (used by _gardener_on_phase_change callback) +_GARDENER_PR="" +_GARDENER_MERGE_START=0 +_GARDENER_MERGE_TIMEOUT=1800 # 30 min +_GARDENER_CI_FIX_COUNT=0 +_GARDENER_REVIEW_ROUND=0 +_GARDENER_CRASH_COUNT=0 log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } @@ -64,8 +75,26 @@ GARDENER_API_EXTRA=" " build_prompt_footer "$GARDENER_API_EXTRA" +# Extend phase protocol with merge-through instructions for compaction survival +PROMPT_FOOTER="${PROMPT_FOOTER} + +## Merge-through protocol (commit-and-pr step) +After creating the PR, write the PR number and signal CI: + echo \"\$PR_NUMBER\" > '${GARDENER_PR_FILE}' + echo 'PHASE:awaiting_ci' > '${PHASE_FILE}' +Then STOP and WAIT for CI results. +When 'CI passed' is injected: + echo 'PHASE:awaiting_review' > '${PHASE_FILE}' +Then STOP and WAIT. +When 'CI failed' is injected: + Fix, commit, push, then: echo 'PHASE:awaiting_ci' > '${PHASE_FILE}' +When review feedback is injected: + Address all feedback, commit, push, then: echo 'PHASE:awaiting_ci' > '${PHASE_FILE}' +If no file changes in commit-and-pr: + echo 'PHASE:done' > '${PHASE_FILE}'" + # shellcheck disable=SC2034 # consumed by run_formula_and_monitor -PROMPT="You are the issue gardener for ${CODEBERG_REPO}. Work through the formula below. You MUST write PHASE:done to '${PHASE_FILE}' when finished — the orchestrator will time you out if you return to the prompt without signalling. +PROMPT="You are the issue gardener for ${CODEBERG_REPO}. Work through the formula below. Follow the phase protocol: if the commit-and-pr step creates a PR, write PHASE:awaiting_ci and wait for orchestrator CI/review/merge handling. If no file changes, write PHASE:done. The orchestrator will time you out if you return to the prompt without signalling. You have full shell access and --dangerously-skip-permissions. Fix what you can. Escalate what you cannot. Do NOT ask permission — act first, report after. @@ -88,16 +117,387 @@ ${FORMULA_CONTENT} ${SCRATCH_INSTRUCTION} ${PROMPT_FOOTER}" +# ── Phase callback for merge-through ───────────────────────────────────── +# Handles CI polling, review injection, merge, and cleanup after PR creation. +# Lighter than dev/phase-handler.sh — tailored for gardener doc-only PRs. + +# shellcheck disable=SC2317 # called indirectly by monitor_phase_loop +_gardener_merge() { + local merge_response merge_http_code + merge_response=$(curl -s -w "\n%{http_code}" -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H 'Content-Type: application/json' \ + "${CODEBERG_API}/pulls/${_GARDENER_PR}/merge" \ + -d '{"Do":"merge","delete_branch_after_merge":true}') || true + merge_http_code=$(echo "$merge_response" | tail -1) + + if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then + log "gardener PR #${_GARDENER_PR} merged" + printf 'PHASE:done\n' > "$PHASE_FILE" + return 0 + fi + + # Already merged (race)? + if [ "$merge_http_code" = "405" ]; then + local pr_merged + pr_merged=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${CODEBERG_API}/pulls/${_GARDENER_PR}" | jq -r '.merged // false') || true + if [ "$pr_merged" = "true" ]; then + log "gardener PR #${_GARDENER_PR} already merged" + printf 'PHASE:done\n' > "$PHASE_FILE" + return 0 + fi + log "gardener merge blocked (HTTP 405) — escalating" + printf 'PHASE:escalate\nReason: gardener PR #%s merge blocked (HTTP 405)\n' \ + "$_GARDENER_PR" > "$PHASE_FILE" + return 0 + fi + + # Other failure (likely conflicts) — tell Claude to rebase + log "gardener merge failed (HTTP ${merge_http_code}) — requesting rebase" + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ + "Merge failed for PR #${_GARDENER_PR} (likely conflicts). Rebase and push: + git fetch origin ${PRIMARY_BRANCH} && git rebase origin/${PRIMARY_BRANCH} + git push --force-with-lease origin HEAD + echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" +If rebase fails, write PHASE:escalate with a reason." +} + +# shellcheck disable=SC2317 # called indirectly by monitor_phase_loop +_gardener_timeout_cleanup() { + log "gardener merge-through timed out (${_GARDENER_MERGE_TIMEOUT}s) — closing PR" + if [ -n "$_GARDENER_PR" ]; then + curl -sf -X PATCH \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H 'Content-Type: application/json' \ + "${CODEBERG_API}/pulls/${_GARDENER_PR}" \ + -d '{"state":"closed"}' >/dev/null 2>&1 || true + fi + printf 'PHASE:failed\nReason: merge-through timeout (%ss)\n' \ + "$_GARDENER_MERGE_TIMEOUT" > "$PHASE_FILE" +} + +# shellcheck disable=SC2317 # called indirectly by monitor_phase_loop +_gardener_handle_ci() { + # Start merge-through timer on first CI phase + if [ "$_GARDENER_MERGE_START" -eq 0 ]; then + _GARDENER_MERGE_START=$(date +%s) + fi + + # Check merge-through timeout + local elapsed + elapsed=$(( $(date +%s) - _GARDENER_MERGE_START )) + if [ "$elapsed" -ge "$_GARDENER_MERGE_TIMEOUT" ]; then + _gardener_timeout_cleanup + return 0 + fi + + # Discover PR number if unknown + if [ -z "$_GARDENER_PR" ]; then + if [ -f "$GARDENER_PR_FILE" ]; then + _GARDENER_PR=$(tr -d '[:space:]' < "$GARDENER_PR_FILE") + fi + # Fallback: search for open gardener PRs + if [ -z "$_GARDENER_PR" ]; then + _GARDENER_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${CODEBERG_API}/pulls?state=open&limit=10" | \ + jq -r '[.[] | select(.head.ref | startswith("chore/gardener-"))] | .[0].number // empty') || true + fi + if [ -z "$_GARDENER_PR" ]; then + log "ERROR: cannot find gardener PR" + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ + "ERROR: Could not find the gardener PR. Verify branch was pushed and PR created. Write the PR number to ${GARDENER_PR_FILE}, then write PHASE:awaiting_ci again." + return 0 + fi + log "tracking gardener PR #${_GARDENER_PR}" + fi + + # Skip CI for doc-only PRs + if ! ci_required_for_pr "$_GARDENER_PR" 2>/dev/null; then + log "CI not required (doc-only) — treating as passed" + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ + "CI passed on PR #${_GARDENER_PR} (doc-only changes, CI not required). +Write PHASE:awaiting_review to the phase file, then stop and wait: + echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" + return 0 + fi + + # No CI configured? + if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then + log "no CI configured — treating as passed" + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ + "CI passed on PR #${_GARDENER_PR} (no CI configured). +Write PHASE:awaiting_review to the phase file, then stop and wait: + echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" + return 0 + fi + + # Get HEAD SHA from PR + local head_sha + head_sha=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${CODEBERG_API}/pulls/${_GARDENER_PR}" | jq -r '.head.sha // empty') || true + + if [ -z "$head_sha" ]; then + log "WARNING: could not get HEAD SHA for PR #${_GARDENER_PR}" + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ + "WARNING: Could not read HEAD SHA for PR #${_GARDENER_PR}. Verify push succeeded. Then write PHASE:awaiting_ci again." + return 0 + fi + + # Poll CI (15 min max within this phase) + local ci_done=false ci_state="unknown" ci_elapsed=0 ci_timeout=900 + while [ "$ci_elapsed" -lt "$ci_timeout" ]; do + sleep 30 + ci_elapsed=$((ci_elapsed + 30)) + + # Session health check + if [ -f "/tmp/claude-exited-${_MONITOR_SESSION:-$SESSION_NAME}.ts" ] || \ + ! tmux has-session -t "${_MONITOR_SESSION:-$SESSION_NAME}" 2>/dev/null; then + log "session died during CI wait" + return 0 + fi + + # Merge-through timeout check + elapsed=$(( $(date +%s) - _GARDENER_MERGE_START )) + if [ "$elapsed" -ge "$_GARDENER_MERGE_TIMEOUT" ]; then + _gardener_timeout_cleanup + return 0 + fi + + # Re-fetch HEAD in case Claude pushed new commits + head_sha=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${CODEBERG_API}/pulls/${_GARDENER_PR}" | jq -r '.head.sha // empty') || true + + ci_state=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${CODEBERG_API}/commits/${head_sha}/status" | jq -r '.state // "unknown"') || ci_state="unknown" + + case "$ci_state" in + success|failure|error) ci_done=true; break ;; + esac + done + + if ! $ci_done; then + log "CI timeout for PR #${_GARDENER_PR}" + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ + "CI TIMEOUT: CI did not complete within 15 minutes for PR #${_GARDENER_PR}. Write PHASE:escalate if you cannot proceed." + return 0 + fi + + log "CI: ${ci_state} for PR #${_GARDENER_PR}" + + if [ "$ci_state" = "success" ]; then + _GARDENER_CI_FIX_COUNT=0 + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ + "CI passed on PR #${_GARDENER_PR}. +Write PHASE:awaiting_review to the phase file, then stop and wait: + echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" + else + _GARDENER_CI_FIX_COUNT=$(( _GARDENER_CI_FIX_COUNT + 1 )) + if [ "$_GARDENER_CI_FIX_COUNT" -gt 3 ]; then + log "CI exhausted after ${_GARDENER_CI_FIX_COUNT} attempts" + printf 'PHASE:escalate\nReason: gardener CI exhausted after %d attempts\n' \ + "$_GARDENER_CI_FIX_COUNT" > "$PHASE_FILE" + return 0 + fi + + # Get error details + local pipeline_num ci_error_log + pipeline_num=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${CODEBERG_API}/commits/${head_sha}/status" | \ + jq -r '.statuses[0].target_url // ""' | grep -oP 'pipeline/\K[0-9]+' | head -1 || true) + + ci_error_log="" + if [ -n "$pipeline_num" ]; then + ci_error_log=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$pipeline_num" 2>/dev/null \ + | tail -80 | head -c 8000 || true) + fi + + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" \ + "CI failed on PR #${_GARDENER_PR} (attempt ${_GARDENER_CI_FIX_COUNT}/3). +${ci_error_log:+Error output: +${ci_error_log} +}Fix the issue, commit, push, then write: + echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" +Then stop and wait." + fi +} + +# shellcheck disable=SC2317 # called indirectly by monitor_phase_loop +_gardener_handle_review() { + log "waiting for review on PR #${_GARDENER_PR:-?}" + _GARDENER_CI_FIX_COUNT=0 # Reset CI fix budget for next review cycle + + local review_elapsed=0 review_timeout=1800 + while [ "$review_elapsed" -lt "$review_timeout" ]; do + sleep 60 # 1 min between review checks (gardener PRs are fast-tracked) + review_elapsed=$((review_elapsed + 60)) + + # Session health check + if [ -f "/tmp/claude-exited-${_MONITOR_SESSION:-$SESSION_NAME}.ts" ] || \ + ! tmux has-session -t "${_MONITOR_SESSION:-$SESSION_NAME}" 2>/dev/null; then + log "session died during review wait" + return 0 + fi + + # Merge-through timeout check + local elapsed + elapsed=$(( $(date +%s) - _GARDENER_MERGE_START )) + if [ "$elapsed" -ge "$_GARDENER_MERGE_TIMEOUT" ]; then + _gardener_timeout_cleanup + return 0 + fi + + # Check if phase changed while we wait (e.g. review-poll injected feedback) + local new_mtime + new_mtime=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) + if [ "$new_mtime" -gt "${LAST_PHASE_MTIME:-0}" ]; then + log "phase changed during review wait — returning to monitor loop" + return 0 + fi + + # Check for review on current HEAD + local review_sha review_comment + review_sha=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${CODEBERG_API}/pulls/${_GARDENER_PR}" | jq -r '.head.sha // empty') || true + + review_comment=$(codeberg_api_all "/issues/${_GARDENER_PR}/comments" 2>/dev/null | \ + jq -r --arg sha "${review_sha:-none}" \ + '[.[] | select(.body | contains("