From cb24968d9b3d9f9c55da34876bf2f250cdf26a05 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 12 Mar 2026 12:44:15 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20dark=20factory=20=E2=80=94=20autonomous?= =?UTF-8?q?=20CI/CD=20agents=20for=20harb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three agents extracted from ~/scripts/harb-{dev,review}/: - dev/ — pull-based dev agent (find ready issues → implement → PR → merge) - review/ — AI code review (structured verdicts, follow-up issues) - factory/ — supervisor (bash health checks, auto-fix, escalation) All secrets externalized to .env (see .env.example). Shared env/helpers in lib/env.sh. --- .env.example | 31 + .gitignore | 11 + README.md | 103 ++++ dev/ci-debug.sh | 79 +++ dev/dev-agent.sh | 1205 +++++++++++++++++++++++++++++++++++++++ dev/dev-poll.sh | 331 +++++++++++ factory/factory-poll.sh | 195 +++++++ lib/env.sh | 66 +++ review/review-poll.sh | 93 +++ review/review-pr.sh | 734 ++++++++++++++++++++++++ 10 files changed, 2848 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 README.md create mode 100755 dev/ci-debug.sh create mode 100755 dev/dev-agent.sh create mode 100755 dev/dev-poll.sh create mode 100755 factory/factory-poll.sh create mode 100755 lib/env.sh create mode 100755 review/review-poll.sh create mode 100755 review/review-pr.sh diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..9b24b87 --- /dev/null +++ b/.env.example @@ -0,0 +1,31 @@ +# Dark Factory — Environment Configuration +# Copy to .env and fill in your values. +# NEVER commit .env to the repo. + +# Codeberg API token (read from ~/.netrc by default, override here if needed) +# CODEBERG_TOKEN= + +# Codeberg review bot token (separate account for formal reviews) +REVIEW_BOT_TOKEN= + +# Woodpecker CI API token +WOODPECKER_TOKEN= + +# Woodpecker CI server URL +WOODPECKER_SERVER=http://localhost:8000 + +# Woodpecker Postgres (for direct DB queries) +WOODPECKER_DB_PASSWORD= +WOODPECKER_DB_USER=woodpecker +WOODPECKER_DB_HOST=127.0.0.1 +WOODPECKER_DB_NAME=woodpecker + +# Target Codeberg repo +CODEBERG_REPO=johba/harb +CODEBERG_API=https://codeberg.org/api/v1/repos/johba/harb + +# Harb repo local path +HARB_REPO_ROOT=/home/debian/harb + +# Claude CLI timeout (seconds) +CLAUDE_TIMEOUT=7200 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..16f2721 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Secrets +.env + +# Runtime state +*.log +state.json +*.lock +*.pid + +# OS +.DS_Store diff --git a/README.md b/README.md new file mode 100644 index 0000000..2cfd628 --- /dev/null +++ b/README.md @@ -0,0 +1,103 @@ +# 🏭 Dark Factory + +Autonomous CI/CD factory for [harb](https://codeberg.org/johba/harb). Three agents, zero supervision needed. + +## Architecture + +``` +cron (*/10) ──→ factory-poll.sh ← supervisor (bash checks, zero tokens) + ├── all clear? → exit 0 + └── problem? → alert (or claude -p for complex fixes) + +cron (*/10) ──→ dev-poll.sh ← pulls ready issues, spawns dev-agent + └── dev-agent.sh ← claude -p: implement → PR → CI → review → merge + +cron (*/10) ──→ review-poll.sh ← finds unreviewed PRs, spawns review + └── review-pr.sh ← claude -p: review → approve/request changes +``` + +## Setup + +```bash +# 1. Clone +git clone ssh://git@codeberg.org/johba/dark-factory.git +cd dark-factory + +# 2. Configure +cp .env.example .env +# Fill in your tokens (see .env.example for descriptions) + +# 3. Install cron +crontab -e +# Add: +# */10 * * * * /path/to/dark-factory/factory/factory-poll.sh +# */10 * * * * /path/to/dark-factory/dev/dev-poll.sh +# */10 * * * * /path/to/dark-factory/review/review-poll.sh + +# 4. Verify +bash factory/factory-poll.sh # should log "all clear" +``` + +## Directory Structure + +``` +dark-factory/ +├── .env.example # Template — copy to .env, add secrets +├── .gitignore # Excludes .env, logs, state files +├── lib/ +│ └── env.sh # Shared: load .env, PATH, API helpers +├── dev/ +│ ├── dev-poll.sh # Cron entry: find ready issues +│ ├── dev-agent.sh # Implementation agent (claude -p) +│ └── ci-debug.sh # Woodpecker CI log helper +├── review/ +│ ├── review-poll.sh # Cron entry: find unreviewed PRs +│ └── review-pr.sh # Review agent (claude -p) +└── factory/ + └── factory-poll.sh # Supervisor: health checks + auto-fix +``` + +## How It Works + +### Dev Agent (Pull System) +1. `dev-poll.sh` scans `backlog`-labeled issues +2. Checks if all dependencies are merged into master +3. Picks the first ready issue, spawns `dev-agent.sh` +4. Agent: creates worktree → `claude -p` implements → commits → pushes → creates PR +5. Waits for CI. If CI fails: feeds errors back to claude (max 2 attempts per phase) +6. Waits for review. If REQUEST_CHANGES: feeds review back to claude +7. On APPROVE: merges PR, cleans up, closes issue + +### Review Agent +1. `review-poll.sh` finds open PRs with passing CI and no review +2. Spawns `review-pr.sh` which runs `claude -p` to review the diff +3. Posts structured review comment with verdict (APPROVE / REQUEST_CHANGES / DISCUSS) +4. Creates follow-up issues for pre-existing bugs found during review + +### Factory Supervisor +1. `factory-poll.sh` runs pure bash checks every 10 minutes: + - CI: stuck or failing pipelines + - PRs: derailed (CI fail + no activity) + - Dev-agent: alive and making progress + - Git: clean state on master + - Infra: RAM, swap, disk, Anvil health + - Review: unreviewed PRs with passing CI +2. Auto-fixes simple issues (restart Anvil, retrigger CI) +3. Escalates complex issues via openclaw system event + +## Requirements + +- [Claude CLI](https://docs.anthropic.com/en/docs/claude-cli) (`claude` in PATH) +- [Foundry](https://getfoundry.sh/) (`forge`, `cast`, `anvil`) +- [Woodpecker CI](https://woodpecker-ci.org/) (local instance) +- PostgreSQL client (`psql`) +- [OpenClaw](https://openclaw.ai/) (for system event notifications, optional) +- `jq`, `curl`, `git` + +## Design Principles + +- **Bash for checks, AI for fixes** — don't burn tokens on health checks +- **Pull system** — readiness derived from merged dependencies, not labels +- **CI fix loop** — each phase gets fresh retry budget +- **Prior art** — dev-agent searches closed PRs to avoid rework +- **No secrets in repo** — everything via `.env` diff --git a/dev/ci-debug.sh b/dev/ci-debug.sh new file mode 100755 index 0000000..aef04bd --- /dev/null +++ b/dev/ci-debug.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# ci-debug.sh — Query Woodpecker CI (CLI for logs, API for structure) +# +# Usage: +# ci-debug.sh status [pipeline] — pipeline overview + step states +# ci-debug.sh logs — full logs for a step +# ci-debug.sh failures [pipeline] — all failed step logs +# ci-debug.sh list [count] — recent pipelines (default 10) + +set -euo pipefail + +# Load shared environment +source "$(dirname "$0")/../lib/env.sh" + +export WOODPECKER_SERVER="http://localhost:8000" +# WOODPECKER_TOKEN loaded from .env via env.sh +REPO="johba/harb" +API="${WOODPECKER_SERVER}/api/repos/2" + +api() { + curl -sf -H "Authorization: Bearer ${WOODPECKER_TOKEN}" "${API}/$1" +} + +get_latest() { + api "pipelines?per_page=1" | jq -r '.[0].number' +} + +case "${1:-help}" in + list) + COUNT="${2:-10}" + api "pipelines?per_page=${COUNT}" | \ + jq -r '.[] | "#\(.number) \(.status) \(.event) \(.commit[:7]) \(.message | split("\n")[0][:60])"' + ;; + + status) + P="${2:-$(get_latest)}" + echo "Pipeline #${P}:" + api "pipelines/${P}" | \ + jq -r '" Status: \(.status) Event: \(.event) Commit: \(.commit[:7])"' + echo "Steps:" + api "pipelines/${P}" | \ + jq -r '.workflows[]? | " [\(.name)]", (.children[]? | " [\(.pid)] \(.name) → \(.state) (exit \(.exit_code))")' + ;; + + logs) + P="${2:?Usage: ci-debug.sh logs }" + S="${3:?Usage: ci-debug.sh logs }" + woodpecker-cli pipeline log show "$REPO" "$P" "$S" + ;; + + failures) + P="${2:-$(get_latest)}" + FAILED=$(api "pipelines/${P}" | \ + jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.pid)\t\(.name)"') + + if [ -z "$FAILED" ]; then + echo "No failed steps in pipeline #${P}" + exit 0 + fi + + while IFS=$'\t' read -r pid name; do + echo "=== FAILED: ${name} (step ${pid}) ===" + woodpecker-cli pipeline log show "$REPO" "$P" "$pid" 2>/dev/null | tail -200 + echo "" + done <<< "$FAILED" + ;; + + help|*) + cat <<'EOF' +ci-debug.sh — Query Woodpecker CI + +Commands: + list [count] Recent pipelines (default 10) + status [pipeline] Pipeline overview + step states + logs Full step logs (step# = pid from status) + failures [pipeline] All failed step logs (last 200 lines each) +EOF + ;; +esac diff --git a/dev/dev-agent.sh b/dev/dev-agent.sh new file mode 100755 index 0000000..a0d550f --- /dev/null +++ b/dev/dev-agent.sh @@ -0,0 +1,1205 @@ +#!/usr/bin/env bash +# dev-agent.sh — Autonomous developer agent for a single issue +# +# Usage: ./dev-agent.sh +# +# Lifecycle: +# 1. Fetch issue, check dependencies (preflight) +# 2. Claim issue (label: in-progress, remove backlog) +# 3. Create worktree + branch +# 4. Run claude -p with implementation prompt +# 5. Commit + push + create PR +# 6. Wait for CI + AI review +# 7. Feed review back via claude -p -c (continues session) +# 8. On APPROVE → merge, delete branch, clean labels, close issue +# +# Preflight JSON output: +# {"status": "ready"} +# {"status": "unmet_dependency", "blocked_by": [315, 316], "suggestion": 317} +# {"status": "too_large", "reason": "..."} +# {"status": "already_done", "reason": "..."} +# +# Peek: cat /tmp/dev-agent-status +# Log: tail -f ~/scripts/harb-dev/dev-agent.log + +set -euo pipefail + +# Load shared environment +source "$(dirname "$0")/../lib/env.sh" + + +# --- Config --- +ISSUE="${1:?Usage: dev-agent.sh }" +REPO="${CODEBERG_REPO}" +REPO_ROOT="${HARB_REPO_ROOT}" + +API="${CODEBERG_API}" +LOCKFILE="/tmp/dev-agent.lock" +STATUSFILE="/tmp/dev-agent-status" +LOGFILE="${FACTORY_ROOT}/dev/dev-agent.log" +PREFLIGHT_RESULT="/tmp/dev-agent-preflight.json" +BRANCH="fix/issue-${ISSUE}" +WORKTREE="/tmp/harb-worktree-${ISSUE}" +REVIEW_POLL_INTERVAL=300 # 5 min between review checks +MAX_REVIEW_ROUNDS=5 +CLAUDE_TIMEOUT=7200 + +# --- Logging --- +log() { + printf '[%s] #%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$ISSUE" "$*" >> "$LOGFILE" +} + +status() { + printf '[%s] dev-agent #%s: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$ISSUE" "$*" > "$STATUSFILE" + log "$*" +} + +notify() { + openclaw system event --text "🔧 dev-agent #${ISSUE}: $*" --mode now 2>/dev/null || true +} + +cleanup_worktree() { + cd "$REPO_ROOT" + git worktree remove "$WORKTREE" --force 2>/dev/null || true + rm -rf "$WORKTREE" + # Clear Claude Code session history for this worktree to prevent hallucinated "already done" + CLAUDE_PROJECT_DIR="$HOME/.claude/projects/$(echo "$WORKTREE" | sed 's|/|-|g; s|^-||')" + rm -rf "$CLAUDE_PROJECT_DIR" 2>/dev/null || true +} + +cleanup_labels() { + curl -sf -X DELETE \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${ISSUE}/labels/in-progress" >/dev/null 2>&1 || true +} + +CLAIMED=false +cleanup() { + rm -f "$LOCKFILE" "$STATUSFILE" + # If we claimed the issue but never created a PR, unclaim it + if [ "$CLAIMED" = true ] && [ -z "${PR_NUMBER:-}" ]; then + log "cleanup: unclaiming issue (no PR created)" + curl -sf -X DELETE \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${ISSUE}/labels/in-progress" >/dev/null 2>&1 || true + curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/labels" \ + -d '{"labels":["backlog"]}' >/dev/null 2>&1 || true + fi +} +trap cleanup EXIT + +# --- Log rotation --- +if [ -f "$LOGFILE" ] && [ "$(stat -c%s "$LOGFILE" 2>/dev/null || echo 0)" -gt 102400 ]; then + mv "$LOGFILE" "$LOGFILE.old" + log "Log rotated" +fi + +# --- Memory guard --- +AVAIL_MB=$(awk '/MemAvailable/ {printf "%d", $2/1024}' /proc/meminfo) +if [ "$AVAIL_MB" -lt 2000 ]; then + log "SKIP: only ${AVAIL_MB}MB available (need 2000MB)" + exit 0 +fi + +# --- Concurrency lock --- +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") + if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then + log "SKIP: another dev-agent running (PID ${LOCK_PID})" + exit 0 + fi + log "Removing stale lock (PID ${LOCK_PID:-?})" + rm -f "$LOCKFILE" +fi +echo $$ > "$LOCKFILE" + +# --- Fetch issue --- +status "fetching issue" +ISSUE_JSON=$(curl -s -H "Authorization: token ${CODEBERG_TOKEN}" "${API}/issues/${ISSUE}") || true +if [ -z "$ISSUE_JSON" ] || ! echo "$ISSUE_JSON" | jq -e '.id' >/dev/null 2>&1; then + log "ERROR: failed to fetch issue #${ISSUE} (API down or invalid response)" + exit 1 +fi +ISSUE_TITLE=$(echo "$ISSUE_JSON" | jq -r '.title') +ISSUE_BODY=$(echo "$ISSUE_JSON" | jq -r '.body // ""') +ISSUE_STATE=$(echo "$ISSUE_JSON" | jq -r '.state') + +if [ "$ISSUE_STATE" != "open" ]; then + log "SKIP: issue #${ISSUE} is ${ISSUE_STATE}" + echo '{"status":"already_done","reason":"issue is closed"}' > "$PREFLIGHT_RESULT" + exit 0 +fi + +log "Issue: ${ISSUE_TITLE}" + +# ============================================================================= +# PREFLIGHT: Check dependencies before doing any work +# ============================================================================= +status "preflight check" + +# Extract dependency references from issue body +# Formats supported: +# - Depends on #315 +# - depends on #315, #316 +# - Blocked by #315 +# - Requires #315 +# - ## Dependencies\n- #315\n- #316 +DEP_NUMBERS=$(echo "$ISSUE_BODY" | \ + grep -ioP '(?:depends on|blocked by|requires|after)\s+#\K[0-9]+|(?:^|\n)\s*-\s*#\K[0-9]+' | \ + sort -un || true) + +# Also extract from a ## Dependencies section (lines starting with - #NNN or - Depends on #NNN) +DEP_SECTION=$(echo "$ISSUE_BODY" | sed -n '/^## Dependencies/,/^## /p' | sed '1d;$d') +if [ -n "$DEP_SECTION" ]; then + SECTION_DEPS=$(echo "$DEP_SECTION" | grep -oP '#\K[0-9]+' | sort -un || true) + DEP_NUMBERS=$(printf '%s\n%s' "$DEP_NUMBERS" "$SECTION_DEPS" | sort -un | grep -v '^$' || true) +fi + +BLOCKED_BY=() +if [ -n "$DEP_NUMBERS" ]; then + while IFS= read -r dep_num; do + [ -z "$dep_num" ] && continue + # Check if dependency issue is closed (= satisfied) + DEP_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${dep_num}" | jq -r '.state // "unknown"') + + if [ "$DEP_STATE" != "closed" ]; then + BLOCKED_BY+=("$dep_num") + log "dependency #${dep_num} is ${DEP_STATE} (not satisfied)" + else + log "dependency #${dep_num} is closed (satisfied)" + fi + done <<< "$DEP_NUMBERS" +fi + +if [ "${#BLOCKED_BY[@]}" -gt 0 ]; then + # Find a suggestion: look for the first blocker that itself has no unmet deps + SUGGESTION="" + for blocker in "${BLOCKED_BY[@]}"; do + BLOCKER_BODY=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${blocker}" | jq -r '.body // ""') + BLOCKER_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${blocker}" | jq -r '.state') + + if [ "$BLOCKER_STATE" != "open" ]; then + continue + fi + + # Check if this blocker has its own unmet deps + BLOCKER_DEPS=$(echo "$BLOCKER_BODY" | \ + grep -ioP '(?:depends on|blocked by|requires|after)\s+#\K[0-9]+' | sort -un || true) + BLOCKER_SECTION=$(echo "$BLOCKER_BODY" | sed -n '/^## Dependencies/,/^## /p' | sed '1d;$d') + if [ -n "$BLOCKER_SECTION" ]; then + BLOCKER_SECTION_DEPS=$(echo "$BLOCKER_SECTION" | grep -oP '#\K[0-9]+' | sort -un || true) + BLOCKER_DEPS=$(printf '%s\n%s' "$BLOCKER_DEPS" "$BLOCKER_SECTION_DEPS" | sort -un | grep -v '^$' || true) + fi + + BLOCKER_BLOCKED=false + if [ -n "$BLOCKER_DEPS" ]; then + while IFS= read -r bd; do + [ -z "$bd" ] && continue + BD_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${bd}" | jq -r '.state // "unknown"') + if [ "$BD_STATE" != "closed" ]; then + BLOCKER_BLOCKED=true + break + fi + done <<< "$BLOCKER_DEPS" + fi + + if [ "$BLOCKER_BLOCKED" = false ]; then + SUGGESTION="$blocker" + break + fi + done + + # Write preflight result + BLOCKED_JSON=$(printf '%s\n' "${BLOCKED_BY[@]}" | jq -R 'tonumber' | jq -sc '.') + if [ -n "$SUGGESTION" ]; then + jq -n --argjson blocked "$BLOCKED_JSON" --argjson suggestion "$SUGGESTION" \ + '{"status":"unmet_dependency","blocked_by":$blocked,"suggestion":$suggestion}' > "$PREFLIGHT_RESULT" + else + jq -n --argjson blocked "$BLOCKED_JSON" \ + '{"status":"unmet_dependency","blocked_by":$blocked,"suggestion":null}' > "$PREFLIGHT_RESULT" + fi + + # Post comment ONLY if last comment isn't already an unmet dependency notice + BLOCKED_LIST=$(printf '#%s, ' "${BLOCKED_BY[@]}" | sed 's/, $//') + LAST_COMMENT_IS_BLOCK=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${ISSUE}/comments?limit=1" | \ + jq -r '.[0].body // ""' | grep -c 'Dev-agent: Unmet dependency' || true) + + if [ "$LAST_COMMENT_IS_BLOCK" -eq 0 ]; then + BLOCK_COMMENT="🚧 **Dev-agent: Unmet dependency** + +### Blocked by open issues + +This issue depends on ${BLOCKED_LIST}, which $(if [ "${#BLOCKED_BY[@]}" -eq 1 ]; then echo "is"; else echo "are"; fi) not yet closed." + if [ -n "$SUGGESTION" ]; then + BLOCK_COMMENT="${BLOCK_COMMENT} + +**Suggestion:** Work on #${SUGGESTION} first." + fi + BLOCK_COMMENT="${BLOCK_COMMENT} + +--- +*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" + + printf '%s' "$BLOCK_COMMENT" > /tmp/block-comment.txt + jq -Rs '{body: .}' < /tmp/block-comment.txt > /tmp/block-comment.json + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/comments" \ + --data-binary @/tmp/block-comment.json 2>/dev/null || true + rm -f /tmp/block-comment.txt /tmp/block-comment.json + else + log "skipping duplicate dependency comment" + fi + + log "BLOCKED: unmet dependencies: ${BLOCKED_BY[*]}$(if [ -n "$SUGGESTION" ]; then echo ", suggest #${SUGGESTION}"; fi)" + notify "blocked by unmet dependencies: ${BLOCKED_BY[*]}" + exit 0 +fi + +# Bash preflight passed (no explicit unmet deps) +log "bash preflight passed — no explicit unmet dependencies" + +# ============================================================================= +# CLAIM ISSUE (tentative — will unclaim if claude refuses) +# ============================================================================= + +curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/labels" \ + -d '{"labels":["in-progress"]}' >/dev/null 2>&1 || true + +curl -sf -X DELETE \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${ISSUE}/labels/backlog" >/dev/null 2>&1 || true + +curl -sf -X DELETE \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${ISSUE}/labels/backlog" >/dev/null 2>&1 || true + +CLAIMED=true + +# ============================================================================= +# CHECK FOR EXISTING PR (recovery mode) +# ============================================================================= +EXISTING_PR="" +EXISTING_BRANCH="" +RECOVERY_MODE=false + +BODY_PR=$(echo "$ISSUE_BODY" | grep -oP 'Existing PR:\s*#\K[0-9]+' | head -1) || true +if [ -n "$BODY_PR" ]; then + PR_CHECK=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${BODY_PR}" | jq -r '{state, head_ref: .head.ref}') + PR_CHECK_STATE=$(echo "$PR_CHECK" | jq -r '.state') + if [ "$PR_CHECK_STATE" = "open" ]; then + EXISTING_PR="$BODY_PR" + EXISTING_BRANCH=$(echo "$PR_CHECK" | jq -r '.head_ref') + log "found existing PR #${EXISTING_PR} on branch ${EXISTING_BRANCH} (from issue body)" + fi +fi + +if [ -z "$EXISTING_PR" ]; then + # Priority 1: match by branch name (most reliable) + FOUND_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "$BRANCH" \ + '.[] | select(.head.ref == $branch) | "\(.number) \(.head.ref)"' | head -1) || true + if [ -n "$FOUND_PR" ]; then + EXISTING_PR=$(echo "$FOUND_PR" | awk '{print $1}') + EXISTING_BRANCH=$(echo "$FOUND_PR" | awk '{print $2}') + log "found existing PR #${EXISTING_PR} on branch ${EXISTING_BRANCH} (from branch match)" + fi +fi + +if [ -z "$EXISTING_PR" ]; then + # Priority 2: match "Fixes #NNN" or "fixes #NNN" in PR body (stricter: word boundary) + FOUND_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg issue "ixes #${ISSUE}\\b" \ + '.[] | select(.body | test($issue; "i")) | "\(.number) \(.head.ref)"' | head -1) || true + if [ -n "$FOUND_PR" ]; then + EXISTING_PR=$(echo "$FOUND_PR" | awk '{print $1}') + EXISTING_BRANCH=$(echo "$FOUND_PR" | awk '{print $2}') + log "found existing PR #${EXISTING_PR} on branch ${EXISTING_BRANCH} (from body match)" + fi +fi + +# Priority 3: check CLOSED PRs for prior art (don't redo work from scratch) +PRIOR_ART_DIFF="" +if [ -z "$EXISTING_PR" ]; then + CLOSED_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls?state=closed&limit=30" | \ + jq -r --arg issue "#${ISSUE}" \ + '.[] | select(.merged != true) | select((.title | contains($issue)) or (.body // "" | test("ixes " + $issue + "\\b"; "i"))) | "\(.number) \(.head.ref)"' | head -1) || true + if [ -n "$CLOSED_PR" ]; then + CLOSED_PR_NUM=$(echo "$CLOSED_PR" | awk '{print $1}') + log "found closed (unmerged) PR #${CLOSED_PR_NUM} as prior art" + # Fetch the diff for claude to reference + PRIOR_ART_DIFF=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${CLOSED_PR_NUM}.diff" | head -500) || true + if [ -n "$PRIOR_ART_DIFF" ]; then + log "captured prior art diff from PR #${CLOSED_PR_NUM} ($(echo "$PRIOR_ART_DIFF" | wc -l) lines)" + fi + fi +fi + +if [ -n "$EXISTING_PR" ]; then + RECOVERY_MODE=true + PR_NUMBER="$EXISTING_PR" + BRANCH="$EXISTING_BRANCH" + log "RECOVERY MODE: adopting PR #${PR_NUMBER} on branch ${BRANCH}" + + PR_SHA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') + + PENDING_REVIEW=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${PR_NUMBER}/comments?limit=50" | \ + jq -r --arg sha "$PR_SHA" \ + '[.[] | select(.body | contains(" + +### Changes made: +${CHANGE_SUMMARY} + +--- +*Addressed at \`$(git rev-parse HEAD | head -c 7)\` · automated by dev-agent (recovery mode)*" + + printf '%s' "$DEV_COMMENT" > /tmp/dev-comment-body.txt + jq -Rs '{body: .}' < /tmp/dev-comment-body.txt > /tmp/dev-comment.json + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${PR_NUMBER}/comments" \ + --data-binary @/tmp/dev-comment.json 2>/dev/null || \ + log "WARNING: failed to post dev-response comment" + rm -f /tmp/dev-comment-body.txt /tmp/dev-comment.json + fi + else + log "no unaddressed review found — PR exists, entering review loop to wait" + cd "$REPO_ROOT" + git fetch origin "$BRANCH" 2>/dev/null + + # Reuse existing worktree if on the right branch (preserves .claude session) + if [ -d "$WORKTREE/.git" ] || [ -f "$WORKTREE/.git" ]; then + WT_BRANCH=$(cd "$WORKTREE" && git rev-parse --abbrev-ref HEAD 2>/dev/null || true) + if [ "$WT_BRANCH" = "$BRANCH" ]; then + log "reusing existing worktree (preserves claude session)" + cd "$WORKTREE" + git pull --ff-only origin "$BRANCH" 2>/dev/null || git reset --hard "origin/${BRANCH}" 2>/dev/null || true + else + cleanup_worktree + git worktree add "$WORKTREE" "origin/${BRANCH}" -B "$BRANCH" 2>&1 || { + log "ERROR: worktree setup failed for recovery" + exit 1 + } + cd "$WORKTREE" + git submodule update --init --recursive 2>/dev/null || true + fi + else + cleanup_worktree + git worktree add "$WORKTREE" "origin/${BRANCH}" -B "$BRANCH" 2>&1 || { + log "ERROR: worktree setup failed for recovery" + exit 1 + } + cd "$WORKTREE" + git submodule update --init --recursive 2>/dev/null || true + fi + fi +else + # ============================================================================= + # NORMAL MODE: implement from scratch + # ============================================================================= + + status "creating worktree" + cd "$REPO_ROOT" + + # Ensure repo is in clean state (abort stale rebases, checkout master) + if [ -d "$REPO_ROOT/.git/rebase-merge" ] || [ -d "$REPO_ROOT/.git/rebase-apply" ]; then + log "WARNING: stale rebase detected in main repo — aborting" + git rebase --abort 2>/dev/null || true + fi + CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown") + if [ "$CURRENT_BRANCH" != "master" ]; then + log "WARNING: main repo on '$CURRENT_BRANCH' instead of master — switching" + git checkout master 2>/dev/null || true + fi + + git fetch origin master 2>/dev/null + git pull --ff-only origin master 2>/dev/null || true + cleanup_worktree + git worktree add "$WORKTREE" origin/master -B "$BRANCH" 2>&1 || { + log "ERROR: worktree creation failed" + git worktree add "$WORKTREE" origin/master -B "$BRANCH" 2>&1 | while read -r wt_line; do log " $wt_line"; done || true + cleanup_labels + exit 1 + } + cd "$WORKTREE" + git checkout -B "$BRANCH" origin/master 2>/dev/null + git submodule update --init --recursive 2>/dev/null || true + # Symlink lib node_modules from main repo (submodule init doesn't run npm install) + for lib_dir in "$REPO_ROOT"/onchain/lib/*/; do + lib_name=$(basename "$lib_dir") + if [ -d "$lib_dir/node_modules" ] && [ ! -d "$WORKTREE/onchain/lib/$lib_name/node_modules" ]; then + ln -s "$lib_dir/node_modules" "$WORKTREE/onchain/lib/$lib_name/node_modules" 2>/dev/null || true + fi + done + + # --- Build the unified prompt: implement OR refuse --- + # Gather open issue list for context (so claude can suggest alternatives) + OPEN_ISSUES_SUMMARY=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues?state=open&labels=backlog&limit=20&type=issues" | \ + jq -r '.[] | "#\(.number) \(.title)"' 2>/dev/null || echo "(could not fetch)") + + PROMPT="You are working in a git worktree at ${WORKTREE} on branch ${BRANCH}. +You have been assigned issue #${ISSUE} for the harb DeFi protocol. + +## Issue: ${ISSUE_TITLE} + +${ISSUE_BODY} + +## Other open issues labeled 'backlog' (for context if you need to suggest alternatives): +${OPEN_ISSUES_SUMMARY} + +$(if [ -n "$PRIOR_ART_DIFF" ]; then echo "## Prior Art (closed PR — DO NOT start from scratch) + +A previous PR attempted this issue but was closed without merging. Review the diff below and reuse as much as possible. Fix whatever caused it to fail (merge conflicts, CI errors, review findings). + +\`\`\`diff +${PRIOR_ART_DIFF} +\`\`\`"; fi) + +## Instructions + +**Before implementing, assess whether you should proceed.** You have two options: + +### Option A: Implement +If the issue is clear, dependencies are met, and scope is reasonable: +1. Read AGENTS.md in this repo for project context and coding conventions. +2. Implement the changes described in the issue. +3. Run lint and tests before you're done (see AGENTS.md for commands). +4. Commit your changes with message: fix: ${ISSUE_TITLE} (#${ISSUE}) +5. Do NOT push or create PRs — the orchestrator handles that. +6. When finished, output a summary of what you changed and why. + +### Option B: Refuse (output JSON only) +If you cannot or should not implement this issue, output ONLY a JSON object (no other text) with one of these structures: + +**Unmet dependency** — required code/infrastructure doesn't exist in the repo yet: +\`\`\` +{\"status\": \"unmet_dependency\", \"blocked_by\": \"short explanation of what's missing\", \"suggestion\": } +\`\`\` + +**Too large** — issue needs to be split, spec is too vague, or scope exceeds a single session: +\`\`\` +{\"status\": \"too_large\", \"reason\": \"what makes it too large and how to split it\"} +\`\`\` + +**Already done** — the work described is already implemented in the codebase: +\`\`\` +{\"status\": \"already_done\", \"reason\": \"where the existing implementation is\"} +\`\`\` + +### How to decide +- Read the issue carefully. Check if files/functions it references actually exist in the repo. +- If it depends on other issues, check if those issues' deliverables are present in the codebase. +- If the issue spec is vague or requires designing multiple new systems, refuse as too_large. +- If another open issue should be done first, suggest it. +- When in doubt, implement. Only refuse if there's a clear, specific reason. + +**Do NOT invent dependencies that aren't real.** If the code compiles and tests pass, that's ready." + + status "claude assessing + implementing" + IMPL_OUTPUT=$(cd "$WORKTREE" && timeout "$CLAUDE_TIMEOUT" \ + claude -p --model sonnet --dangerously-skip-permissions "$PROMPT" 2>&1) || { + EXIT_CODE=$? + if [ "$EXIT_CODE" -eq 124 ]; then + log "TIMEOUT: claude took longer than ${CLAUDE_TIMEOUT}s" + notify "timed out during implementation" + else + log "ERROR: claude exited with code ${EXIT_CODE}" + notify "claude failed (exit ${EXIT_CODE})" + fi + cleanup_labels + cleanup_worktree + exit 1 + } + + log "claude finished ($(printf '%s' "$IMPL_OUTPUT" | wc -c) bytes)" + printf '%s' "$IMPL_OUTPUT" > /tmp/dev-agent-last-output.txt + + # --- Check if claude refused (JSON response) vs implemented (commits) --- + REFUSAL_JSON="" + + # Check for refusal: try to parse output as JSON with a status field + # First try raw output + if printf '%s' "$IMPL_OUTPUT" | jq -e '.status' > /dev/null 2>&1; then + REFUSAL_JSON="$IMPL_OUTPUT" + else + # Try extracting from code fence + EXTRACTED=$(printf '%s' "$IMPL_OUTPUT" | sed -n '/^```/,/^```$/p' | sed '1d;$d') + if [ -n "$EXTRACTED" ] && printf '%s' "$EXTRACTED" | jq -e '.status' > /dev/null 2>&1; then + REFUSAL_JSON="$EXTRACTED" + else + # Try extracting first { ... } block (handles preamble text before JSON) + EXTRACTED=$(printf '%s' "$IMPL_OUTPUT" | grep -Pzo '\{[^{}]*"status"[^{}]*\}' 2>/dev/null | tr '\0' '\n' | head -1 || true) + if [ -n "$EXTRACTED" ] && printf '%s' "$EXTRACTED" | jq -e '.status' > /dev/null 2>&1; then + REFUSAL_JSON="$EXTRACTED" + fi + fi + fi + + # But only treat as refusal if there are NO commits (claude might output JSON-like text AND commit) + cd "$WORKTREE" + AHEAD=$(git rev-list origin/master..HEAD --count 2>/dev/null || echo "0") + HAS_CHANGES=$(git status --porcelain) + + if [ -n "$REFUSAL_JSON" ] && [ "$AHEAD" -eq 0 ] && [ -z "$HAS_CHANGES" ]; then + # Claude refused — parse and handle + REFUSAL_STATUS=$(printf '%s' "$REFUSAL_JSON" | jq -r '.status') + log "claude refused: ${REFUSAL_STATUS}" + + # Write preflight result for dev-poll.sh + printf '%s' "$REFUSAL_JSON" > "$PREFLIGHT_RESULT" + + # Unclaim issue (restore backlog label, remove in-progress) + cleanup_labels + curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/labels" \ + -d '{"labels":["backlog"]}' >/dev/null 2>&1 || true + + # --- Post refusal comment on the issue (deduplicated) --- + post_refusal_comment() { + local emoji="$1" title="$2" body="$3" + + # Skip if last comment already has same title (prevent spam) + local last_has_title + last_has_title=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${ISSUE}/comments?limit=1" | \ + jq -r --arg t "Dev-agent: ${title}" '.[0].body // "" | contains($t)') || true + if [ "$last_has_title" = "true" ]; then + log "skipping duplicate refusal comment: ${title}" + return 0 + fi + + local comment="${emoji} **Dev-agent: ${title}** + +${body} + +--- +*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" + + printf '%s' "$comment" > "${TMPDIR}/refusal-comment.txt" + jq -Rs '{body: .}' < "${TMPDIR}/refusal-comment.txt" > "${TMPDIR}/refusal-comment.json" + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/comments" \ + --data-binary @"${TMPDIR}/refusal-comment.json" 2>/dev/null || \ + log "WARNING: failed to post refusal comment" + rm -f "${TMPDIR}/refusal-comment.txt" "${TMPDIR}/refusal-comment.json" + } + + case "$REFUSAL_STATUS" in + unmet_dependency) + BLOCKED_BY=$(printf '%s' "$REFUSAL_JSON" | jq -r '.blocked_by // "unknown"') + SUGGESTION=$(printf '%s' "$REFUSAL_JSON" | jq -r '.suggestion // empty') + log "unmet dependency: ${BLOCKED_BY}. suggestion: ${SUGGESTION:-none}" + notify "refused #${ISSUE}: unmet dependency — ${BLOCKED_BY}" + + COMMENT_BODY="### Blocked by unmet dependency + +${BLOCKED_BY}" + if [ -n "$SUGGESTION" ] && [ "$SUGGESTION" != "null" ]; then + COMMENT_BODY="${COMMENT_BODY} + +**Suggestion:** Work on #${SUGGESTION} first." + fi + post_refusal_comment "🚧" "Unmet dependency" "$COMMENT_BODY" + ;; + too_large) + REASON=$(printf '%s' "$REFUSAL_JSON" | jq -r '.reason // "unspecified"') + log "too large: ${REASON}" + notify "refused #${ISSUE}: too large — ${REASON}" + + post_refusal_comment "📏" "Too large for single session" "### Why this can't be implemented as-is + +${REASON} + +### Next steps +A maintainer should split this issue or add more detail to the spec." + + # Label as underspecified + curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/labels" \ + -d '{"labels":["underspecified"]}' >/dev/null 2>&1 || true + curl -sf -X DELETE \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${ISSUE}/labels/backlog" >/dev/null 2>&1 || true + ;; + already_done) + REASON=$(printf '%s' "$REFUSAL_JSON" | jq -r '.reason // "unspecified"') + log "already done: ${REASON}" + notify "refused #${ISSUE}: already done — ${REASON}" + + post_refusal_comment "✅" "Already implemented" "### Existing implementation + +${REASON} + +This issue may be ready to close." + ;; + *) + log "unknown refusal status: ${REFUSAL_STATUS}" + notify "refused #${ISSUE}: unknown reason" + + post_refusal_comment "❓" "Unable to proceed" "The dev-agent could not process this issue. + +Raw response: +\`\`\`json +$(printf '%s' "$REFUSAL_JSON" | head -c 2000) +\`\`\`" + ;; + esac + + cleanup_worktree + exit 0 + fi + + # --- Claude implemented (has commits or changes) --- + # Write ready status for dev-poll.sh + echo '{"status":"ready"}' > "$PREFLIGHT_RESULT" + + if [ -z "$HAS_CHANGES" ] && [ "$AHEAD" -eq 0 ]; then + log "ERROR: no changes and no refusal JSON" + notify "no changes made, aborting" + cleanup_labels + cleanup_worktree + exit 1 + fi + + if [ -n "$HAS_CHANGES" ]; then + status "committing changes" + git add -A + git commit --no-verify -m "fix: ${ISSUE_TITLE} (#${ISSUE})" 2>&1 | tail -2 + else + log "claude already committed (${AHEAD} commits ahead)" + fi + + log "HEAD: $(git log --oneline -1)" + + status "pushing branch" + if ! git push origin "$BRANCH" --force 2>&1 | tail -3; then + log "ERROR: git push failed" + notify "failed to push branch ${BRANCH}" + cleanup_labels + cleanup_worktree + exit 1 + fi + log "pushed ${BRANCH}" + + status "creating PR" + IMPL_SUMMARY=$(echo "$IMPL_OUTPUT" | tail -40 | head -c 4000) + + # Build PR body safely via file (avoids command-line arg size limits) + printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > /tmp/pr-body-${ISSUE}.txt + jq -n \ + --arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \ + --rawfile body "/tmp/pr-body-${ISSUE}.txt" \ + --arg head "$BRANCH" \ + --arg base "master" \ + '{title: $title, body: $body, head: $head, base: $base}' > /tmp/pr-request-${ISSUE}.json + + PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls" \ + --data-binary @/tmp/pr-request-${ISSUE}.json) + + PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1) + PR_RESPONSE=$(echo "$PR_RESPONSE" | sed '$d') + rm -f /tmp/pr-body-${ISSUE}.txt /tmp/pr-request-${ISSUE}.json + + if [ "$PR_HTTP_CODE" != "201" ] && [ "$PR_HTTP_CODE" != "200" ]; then + log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE}): $(echo "$PR_RESPONSE" | head -3)" + notify "failed to create PR (HTTP ${PR_HTTP_CODE})" + cleanup_labels + cleanup_worktree + exit 1 + fi + + PR_NUMBER=$(echo "$PR_RESPONSE" | jq -r '.number') + + if [ "$PR_NUMBER" = "null" ] || [ -z "$PR_NUMBER" ]; then + log "ERROR: failed to create PR: $(echo "$PR_RESPONSE" | head -5)" + notify "failed to create PR" + cleanup_labels + cleanup_worktree + exit 1 + fi + + log "created PR #${PR_NUMBER}" + notify "PR #${PR_NUMBER} created for issue #${ISSUE}: ${ISSUE_TITLE}" +fi + +# ============================================================================= +# MERGE HELPER +# ============================================================================= +do_merge() { + local sha="$1" + + for m in $(seq 1 20); do + local ci + ci=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${sha}/status" | jq -r '.state // "unknown"') + [ "$ci" = "success" ] && break + sleep 30 + done + + local http_code + http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${PR_NUMBER}/merge" \ + -d '{"Do":"merge","delete_branch_after_merge":true}') + + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ] || [ "$http_code" = "405" ]; then + log "PR #${PR_NUMBER} merged!" + + curl -sf -X DELETE \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/branches/${BRANCH}" >/dev/null 2>&1 || true + + curl -sf -X PATCH \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}" \ + -d '{"state":"closed"}' >/dev/null 2>&1 || true + cleanup_labels + + notify "✅ PR #${PR_NUMBER} merged! Issue #${ISSUE} done." + cleanup_worktree + exit 0 + else + log "merge failed (HTTP ${http_code})" + notify "PR #${PR_NUMBER} approved but merge failed (HTTP ${http_code}). Please merge manually." + exit 0 + fi +} + +# ============================================================================= +# REVIEW LOOP +# ============================================================================= +REVIEW_ROUND=0 +CI_RETRY_COUNT=0 +CI_FIX_COUNT=0 + +while [ "$REVIEW_ROUND" -lt "$MAX_REVIEW_ROUNDS" ]; do + status "waiting for CI + review on PR #${PR_NUMBER} (round $((REVIEW_ROUND + 1)))" + + CI_DONE=false + for i in $(seq 1 60); do + sleep 30 + CURRENT_SHA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') + CI_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${CURRENT_SHA}/status" | jq -r '.state // "unknown"') + + if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + log "CI: ${CI_STATE}" + CI_DONE=true + # Reset CI fix budget on success — each phase gets fresh attempts + if [ "$CI_STATE" = "success" ]; then + CI_FIX_COUNT=0 + fi + break + fi + done + + if ! $CI_DONE; then + log "TIMEOUT: CI didn't complete in 30min" + notify "CI timeout on PR #${PR_NUMBER}" + exit 1 + fi + + # --- Handle CI failure --- + if [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + PIPELINE_NUM=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${CURRENT_SHA}/status" | jq -r '.statuses[0].target_url // ""' | grep -oP 'pipeline/\K[0-9]+' | head -1 || true) + + FAILED_STEP="" + FAILED_EXIT="" + if [ -n "$PIPELINE_NUM" ]; then + FAILED_INFO=$(curl -sf \ + -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ + "http://localhost:8000/api/repos/2/pipelines/${PIPELINE_NUM}" | \ + jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1) + FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1) + FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2) + fi + + log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}" + + IS_INFRA=false + case "${FAILED_STEP}" in git*) IS_INFRA=true ;; esac + case "${FAILED_EXIT}" in 128|137) IS_INFRA=true ;; esac + + if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then + CI_RETRY_COUNT=$(( ${CI_RETRY_COUNT:-0} + 1 )) + log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})" + cd "$WORKTREE" + git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify 2>&1 | tail -1 + git push origin "$BRANCH" --force 2>&1 | tail -3 + continue + fi + + CI_FIX_COUNT=$(( ${CI_FIX_COUNT:-0} + 1 )) + if [ "$CI_FIX_COUNT" -gt 2 ]; then + log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts" + notify "❌ PR #${PR_NUMBER} CI failed ${CI_FIX_COUNT}x: ${FAILED_STEP:-unknown}. Needs human attention." + break + fi + + CI_ERROR_LOG="" + if [ -n "$PIPELINE_NUM" ]; then + CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/dev/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "") + fi + + log "CI code failure — feeding back to claude (attempt ${CI_FIX_COUNT})" + status "claude fixing CI failure (attempt ${CI_FIX_COUNT})" + + CI_FIX_PROMPT="CI failed on your PR for issue #${ISSUE}: ${ISSUE_TITLE} +You are in worktree ${WORKTREE} on branch ${BRANCH}. + +## CI Debug Tool +\`\`\`bash +bash "${FACTORY_ROOT}/dev/ci-debug.sh" status ${PIPELINE_NUM:-0} +bash "${FACTORY_ROOT}/dev/ci-debug.sh" logs ${PIPELINE_NUM:-0} +bash "${FACTORY_ROOT}/dev/ci-debug.sh" failures ${PIPELINE_NUM:-0} +\`\`\` + +## Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?}) + +## Error snippet: +\`\`\` +${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.} +\`\`\` + +## Instructions +1. Run ci-debug.sh failures to get full error output. +2. Read the failing test file(s) — understand what the tests EXPECT. +3. Read AGENTS.md for conventions. +4. Fix the root cause — do NOT weaken tests. +5. Run lint/typecheck if applicable. Commit your fix. +6. Output a SHORT bullet-list summary." + + CI_FIX_OUTPUT="" + if [ "$CI_FIX_COUNT" -eq 1 ] && [ "$REVIEW_ROUND" -eq 0 ]; then + CI_FIX_OUTPUT=$(cd "$WORKTREE" && timeout "$CLAUDE_TIMEOUT" \ + claude -p -c --model sonnet --dangerously-skip-permissions "$CI_FIX_PROMPT" 2>&1) || { + CI_FIX_OUTPUT=$(cd "$WORKTREE" && timeout "$CLAUDE_TIMEOUT" \ + claude -p --model sonnet --dangerously-skip-permissions "$CI_FIX_PROMPT" 2>&1) || true + } + else + CI_FIX_OUTPUT=$(cd "$WORKTREE" && timeout "$CLAUDE_TIMEOUT" \ + claude -p -c --model sonnet --dangerously-skip-permissions "$CI_FIX_PROMPT" 2>&1) || { + CI_FIX_OUTPUT=$(cd "$WORKTREE" && timeout "$CLAUDE_TIMEOUT" \ + claude -p --model sonnet --dangerously-skip-permissions "$CI_FIX_PROMPT" 2>&1) || true + } + fi + + log "claude finished CI fix attempt ${CI_FIX_COUNT}" + + cd "$WORKTREE" + if [ -n "$(git status --porcelain)" ]; then + git add -A + git commit --no-verify -m "fix: CI failure in ${FAILED_STEP:-build} (#${ISSUE})" 2>&1 | tail -2 + fi + + REMOTE_SHA=$(git ls-remote origin "$BRANCH" 2>/dev/null | awk '{print $1}') + LOCAL_SHA=$(git rev-parse HEAD) + if [ "$LOCAL_SHA" != "$REMOTE_SHA" ]; then + git push origin "$BRANCH" --force 2>&1 | tail -3 + log "pushed CI fix (attempt ${CI_FIX_COUNT})" + notify "PR #${PR_NUMBER}: pushed CI fix attempt ${CI_FIX_COUNT} (${FAILED_STEP:-build})" + else + log "no changes after CI fix attempt — bailing" + notify "❌ PR #${PR_NUMBER}: claude couldn't fix CI failure in ${FAILED_STEP:-unknown}. Needs human attention." + break + fi + + continue + fi + + # --- Wait for review --- + REVIEW_TEXT="" + for i in $(seq 1 36); do + sleep "$REVIEW_POLL_INTERVAL" + + CURRENT_SHA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') + + REVIEW_COMMENT=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${PR_NUMBER}/comments?limit=50" | \ + jq -r --arg sha "$CURRENT_SHA" \ + '[.[] | select(.body | contains(" + +### Changes made: +${CHANGE_SUMMARY} + +--- +*Addressed at \`$(git rev-parse HEAD | head -c 7)\` · automated by dev-agent*" + + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${PR_NUMBER}/comments" \ + -d "$(jq -n --arg body "$DEV_COMMENT" '{body: $body}')" 2>/dev/null || \ + log "WARNING: failed to post dev-response comment" + fi +done + +if [ "$REVIEW_ROUND" -ge "$MAX_REVIEW_ROUNDS" ]; then + log "hit max review rounds (${MAX_REVIEW_ROUNDS})" + notify "PR #${PR_NUMBER}: hit ${MAX_REVIEW_ROUNDS} review rounds, needs human attention" +fi + +cleanup_labels +# Keep worktree if PR is still open (recovery can reuse session context) +if [ -n "${PR_NUMBER:-}" ]; then + PR_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.state // "unknown"') || true + if [ "$PR_STATE" = "open" ]; then + log "keeping worktree (PR #${PR_NUMBER} still open, session preserved for recovery)" + else + cleanup_worktree + fi +else + cleanup_worktree +fi +log "dev-agent finished for issue #${ISSUE}" diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh new file mode 100755 index 0000000..8cd5b2c --- /dev/null +++ b/dev/dev-poll.sh @@ -0,0 +1,331 @@ +#!/usr/bin/env bash +# dev-poll.sh — Pull-based factory: find the next ready issue and start dev-agent +# +# Pull system: issues labeled "backlog" are candidates. An issue is READY when +# ALL its dependency issues are closed AND their PRs are merged into master. +# No "todo" label needed — readiness is derived from reality. +# +# Priority: +# 1. Orphaned "in-progress" issues (agent died or PR needs attention) +# 2. Ready "backlog" issues (all deps merged) +# +# Usage: cron every 10min + +set -euo pipefail + +# Load shared environment +source "$(dirname "$0")/../lib/env.sh" + + +REPO="${CODEBERG_REPO}" + +API="${CODEBERG_API}" +LOCKFILE="/tmp/dev-agent.lock" +LOGFILE="${FACTORY_ROOT}/dev/dev-agent.log" +PREFLIGHT_RESULT="/tmp/dev-agent-preflight.json" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +log() { + printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +# --- Check if dev-agent already running --- +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") + if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then + log "agent running (PID ${LOCK_PID})" + exit 0 + fi + rm -f "$LOCKFILE" +fi + +# --- Memory guard --- +AVAIL_MB=$(awk '/MemAvailable/{printf "%d", $2/1024}' /proc/meminfo) +if [ "$AVAIL_MB" -lt 2000 ]; then + log "SKIP: only ${AVAIL_MB}MB available (need 2000MB)" + exit 0 +fi + +# ============================================================================= +# HELPER: check if a dependency issue is fully resolved (closed + PR merged) +# ============================================================================= +dep_is_merged() { + local dep_num="$1" + + # Check issue is closed + local dep_state + dep_state=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${dep_num}" | jq -r '.state // "open"') + if [ "$dep_state" != "closed" ]; then + return 1 + fi + + # Check there's a merged PR for this issue + # Search closed PRs for title containing "#NNN" or body containing "Fixes #NNN" + local has_merged + has_merged=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls?state=closed&limit=30" | \ + jq -r --arg num "#${dep_num}" \ + '[.[] | select(.merged == true) | select((.title | contains($num)) or (.body // "" | test("ixes " + $num + "\\b"; "i")))] | length') + + [ "${has_merged:-0}" -gt 0 ] +} + +# ============================================================================= +# HELPER: extract dependency numbers from issue body +# ============================================================================= +get_deps() { + local issue_body="$1" + # Extract #NNN references from "Depends on" / "Blocked by" sections + # Capture the header line AND subsequent lines until next ## section + { + echo "$issue_body" | awk ' + BEGIN { IGNORECASE=1 } + /^##? *(Depends on|Blocked by|Dependencies)/ { capture=1; next } + capture && /^##? / { capture=0 } + capture { print } + ' | grep -oP '#\K[0-9]+' || true + # Also check inline deps on same line as keyword + echo "$issue_body" | grep -iE '(depends on|blocked by)' | grep -oP '#\K[0-9]+' || true + } | sort -un +} + +# ============================================================================= +# HELPER: check if issue is ready (all deps merged) +# ============================================================================= +issue_is_ready() { + local issue_num="$1" + local issue_body="$2" + + local deps + deps=$(get_deps "$issue_body") + + if [ -z "$deps" ]; then + # No dependencies — always ready + return 0 + fi + + while IFS= read -r dep; do + [ -z "$dep" ] && continue + if ! dep_is_merged "$dep"; then + log " #${issue_num} blocked: dep #${dep} not merged" + return 1 + fi + done <<< "$deps" + + return 0 +} + +# ============================================================================= +# PRIORITY 1: orphaned in-progress issues +# ============================================================================= +log "checking for in-progress issues" +ORPHANS_JSON=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues?state=open&labels=in-progress&limit=10&type=issues") + +ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length') +if [ "$ORPHAN_COUNT" -gt 0 ]; then + ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number') + + # Check if there's already an open PR for this issue + HAS_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + + if [ -n "$HAS_PR" ]; then + PR_SHA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true + CI_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${PR_SHA}/status" | jq -r '.state // "unknown"') || true + + # Check formal reviews + HAS_APPROVE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${HAS_PR}/reviews" | \ + jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true + HAS_CHANGES=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${HAS_PR}/reviews" | \ + jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true + + if [ "$CI_STATE" = "success" ] && [ "${HAS_APPROVE:-0}" -gt 0 ]; then + log "PR #${HAS_PR} approved + CI green → merging" + MERGE_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${HAS_PR}/merge" \ + -d '{"Do":"merge","delete_branch_after_merge":true}') + + if [ "$MERGE_CODE" = "200" ] || [ "$MERGE_CODE" = "204" ] || [ "$MERGE_CODE" = "405" ]; then + log "PR #${HAS_PR} merged! Closing #${ISSUE_NUM}" + curl -sf -X PATCH -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE_NUM}" -d '{"state":"closed"}' >/dev/null 2>&1 || true + curl -sf -X DELETE -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}/labels/in-progress" >/dev/null 2>&1 || true + openclaw system event --text "✅ PR #${HAS_PR} merged! Issue #${ISSUE_NUM} done." --mode now 2>/dev/null || true + else + log "merge failed (HTTP ${MERGE_CODE})" + fi + exit 0 + + elif [ "$CI_STATE" = "success" ] && [ "${HAS_CHANGES:-0}" -gt 0 ]; then + log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" + exit 0 + + elif [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" + exit 0 + + else + log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" + fi + else + log "recovering orphaned issue #${ISSUE_NUM} (no PR found)" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" + exit 0 + fi +fi + +# ============================================================================= +# PRIORITY 2: find ready backlog issues (pull system) +# ============================================================================= +log "scanning backlog for ready issues" +BACKLOG_JSON=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/issues?state=open&labels=backlog&limit=20&type=issues") + +BACKLOG_COUNT=$(echo "$BACKLOG_JSON" | jq 'length') +if [ "$BACKLOG_COUNT" -eq 0 ]; then + log "no backlog issues" + exit 0 +fi + +log "found ${BACKLOG_COUNT} backlog issues" + +# Check each for readiness +READY_ISSUE="" +for i in $(seq 0 $((BACKLOG_COUNT - 1))); do + ISSUE_NUM=$(echo "$BACKLOG_JSON" | jq -r ".[$i].number") + ISSUE_BODY=$(echo "$BACKLOG_JSON" | jq -r ".[$i].body // \"\"") + + if ! issue_is_ready "$ISSUE_NUM" "$ISSUE_BODY"; then + continue + fi + + # Check if there's already an open PR for this issue that needs attention + EXISTING_PR=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "fix/issue-${ISSUE_NUM}" --arg num "#${ISSUE_NUM}" \ + '.[] | select((.head.ref == $branch) or (.title | contains($num))) | .number' | head -1) || true + + if [ -n "$EXISTING_PR" ]; then + PR_SHA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${EXISTING_PR}" | jq -r '.head.sha') || true + CI_STATE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/commits/${PR_SHA}/status" | jq -r '.state // "unknown"') || true + HAS_APPROVE=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${EXISTING_PR}/reviews" | \ + jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true + HAS_CHANGES=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API}/pulls/${EXISTING_PR}/reviews" | \ + jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true + + if [ "$CI_STATE" = "success" ] && [ "${HAS_APPROVE:-0}" -gt 0 ]; then + log "#${ISSUE_NUM} PR #${EXISTING_PR} approved + CI green → merging" + MERGE_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${EXISTING_PR}/merge" \ + -d '{"Do":"merge","delete_branch_after_merge":true}') + if [ "$MERGE_CODE" = "200" ] || [ "$MERGE_CODE" = "204" ] || [ "$MERGE_CODE" = "405" ]; then + log "PR #${EXISTING_PR} merged! Closing #${ISSUE_NUM}" + curl -sf -X PATCH -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE_NUM}" -d '{"state":"closed"}' >/dev/null 2>&1 || true + openclaw system event --text "✅ PR #${EXISTING_PR} merged! Issue #${ISSUE_NUM} done." --mode now 2>/dev/null || true + fi + continue + + elif [ "${HAS_CHANGES:-0}" -gt 0 ]; then + log "#${ISSUE_NUM} PR #${EXISTING_PR} has REQUEST_CHANGES — picking up" + READY_ISSUE="$ISSUE_NUM" + break + + elif [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + log "#${ISSUE_NUM} PR #${EXISTING_PR} CI failed — picking up" + READY_ISSUE="$ISSUE_NUM" + break + + else + log "#${ISSUE_NUM} PR #${EXISTING_PR} exists (CI: ${CI_STATE}, waiting)" + continue + fi + fi + + READY_ISSUE="$ISSUE_NUM" + log "#${ISSUE_NUM} is READY (all deps merged, no existing PR)" + break +done + +if [ -z "$READY_ISSUE" ]; then + log "no ready issues (all blocked by unmerged deps)" + exit 0 +fi + +# ============================================================================= +# LAUNCH: start dev-agent for the ready issue +# ============================================================================= +log "launching dev-agent for #${READY_ISSUE}" +rm -f "$PREFLIGHT_RESULT" + +nohup "${SCRIPT_DIR}/dev-agent.sh" "$READY_ISSUE" >> "$LOGFILE" 2>&1 & +AGENT_PID=$! + +# Wait briefly for preflight (agent writes result before claiming) +for w in $(seq 1 30); do + if [ -f "$PREFLIGHT_RESULT" ]; then + break + fi + if ! kill -0 "$AGENT_PID" 2>/dev/null; then + break + fi + sleep 2 +done + +if [ -f "$PREFLIGHT_RESULT" ]; then + PREFLIGHT_STATUS=$(jq -r '.status // "unknown"' < "$PREFLIGHT_RESULT") + rm -f "$PREFLIGHT_RESULT" + + case "$PREFLIGHT_STATUS" in + ready) + log "dev-agent running for #${READY_ISSUE}" + ;; + unmet_dependency) + log "#${READY_ISSUE} has code-level dependency (preflight blocked)" + wait "$AGENT_PID" 2>/dev/null || true + ;; + too_large) + REASON=$(jq -r '.reason // "unspecified"' < "$PREFLIGHT_RESULT" 2>/dev/null || echo "unspecified") + log "#${READY_ISSUE} too large: ${REASON}" + # Label as underspecified + curl -sf -X POST -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${READY_ISSUE}/labels" \ + -d '{"labels":["underspecified"]}' >/dev/null 2>&1 || true + ;; + already_done) + log "#${READY_ISSUE} already done" + ;; + *) + log "#${READY_ISSUE} unknown preflight: ${PREFLIGHT_STATUS}" + ;; + esac +elif kill -0 "$AGENT_PID" 2>/dev/null; then + log "dev-agent running for #${READY_ISSUE} (passed preflight)" +else + log "dev-agent exited for #${READY_ISSUE} without preflight result" +fi diff --git a/factory/factory-poll.sh b/factory/factory-poll.sh new file mode 100755 index 0000000..15145e0 --- /dev/null +++ b/factory/factory-poll.sh @@ -0,0 +1,195 @@ +#!/usr/bin/env bash +# factory-poll.sh — Factory supervisor: bash checks + claude -p for fixes +# +# Runs every 10min via cron. Does all health checks in bash (zero tokens). +# Only invokes claude -p when intervention is needed. +# +# Cron: */10 * * * * /path/to/dark-factory/factory/factory-poll.sh +# +# Peek: cat /tmp/factory-status +# Log: tail -f /path/to/dark-factory/factory/factory.log + +source "$(dirname "$0")/../lib/env.sh" + +LOGFILE="${FACTORY_ROOT}/factory/factory.log" +STATUSFILE="/tmp/factory-status" +LOCKFILE="/tmp/factory-poll.lock" + +# Prevent overlapping runs +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null) + if kill -0 "$LOCK_PID" 2>/dev/null; then + exit 0 + fi + rm -f "$LOCKFILE" +fi +echo $$ > "$LOCKFILE" +trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT + +status() { + printf '[%s] factory: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE" + log "$*" >> "$LOGFILE" +} + +ALERTS="" +alert() { + ALERTS="${ALERTS}• $*\n" + log "ALERT: $*" >> "$LOGFILE" +} + +# ============================================================================= +# CHECK 1: Stuck/failed CI pipelines +# ============================================================================= +status "checking CI" + +STUCK_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=2 AND status='running' AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200;" 2>/dev/null | xargs) +[ "${STUCK_CI:-0}" -gt 0 ] && alert "CI: ${STUCK_CI} pipeline(s) running >20min" + +PENDING_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=2 AND status='pending' AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800;" 2>/dev/null | xargs) +[ "${PENDING_CI:-0}" -gt 0 ] && alert "CI: ${PENDING_CI} pipeline(s) pending >30min" + +# ============================================================================= +# CHECK 2: Derailed PRs — open with CI failure + no push in 30min +# ============================================================================= +status "checking PRs" + +OPEN_PRS=$(codeberg_api GET "/pulls?state=open&limit=10" 2>/dev/null | jq -r '.[].number' 2>/dev/null || true) +for pr in $OPEN_PRS; do + PR_SHA=$(codeberg_api GET "/pulls/${pr}" 2>/dev/null | jq -r '.head.sha' 2>/dev/null || true) + [ -z "$PR_SHA" ] && continue + + CI_STATE=$(codeberg_api GET "/commits/${PR_SHA}/status" 2>/dev/null | jq -r '.state // "unknown"' 2>/dev/null || true) + if [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + # Check when last push happened + UPDATED=$(codeberg_api GET "/pulls/${pr}" 2>/dev/null | jq -r '.updated_at // ""' 2>/dev/null || true) + if [ -n "$UPDATED" ]; then + UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || echo 0) + NOW_EPOCH=$(date +%s) + AGE_MIN=$(( (NOW_EPOCH - UPDATED_EPOCH) / 60 )) + if [ "$AGE_MIN" -gt 30 ]; then + alert "PR #${pr}: CI=${CI_STATE}, no activity for ${AGE_MIN}min" + fi + fi + fi +done + +# ============================================================================= +# CHECK 3: Dev-agent health +# ============================================================================= +status "checking dev-agent" + +DEV_LOCK="/tmp/dev-agent.lock" +if [ -f "$DEV_LOCK" ]; then + DEV_PID=$(cat "$DEV_LOCK" 2>/dev/null) + if ! kill -0 "$DEV_PID" 2>/dev/null; then + alert "Dev-agent: lock file exists but PID ${DEV_PID} is dead (stale lock)" + else + # Check if it's making progress — same status for >30min? + DEV_STATUS=$(cat /tmp/dev-agent-status 2>/dev/null || echo "") + DEV_STATUS_AGE=$(stat -c %Y /tmp/dev-agent-status 2>/dev/null || echo 0) + NOW_EPOCH=$(date +%s) + STATUS_AGE_MIN=$(( (NOW_EPOCH - DEV_STATUS_AGE) / 60 )) + if [ "$STATUS_AGE_MIN" -gt 30 ]; then + alert "Dev-agent: status unchanged for ${STATUS_AGE_MIN}min — possibly stuck" + fi + fi +fi + +# ============================================================================= +# CHECK 4: Git repo health +# ============================================================================= +status "checking git repo" + +cd "${HARB_REPO_ROOT}" 2>/dev/null || true +GIT_STATUS=$(git status --porcelain 2>/dev/null | wc -l) +GIT_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") +GIT_REBASE=$([ -d .git/rebase-merge ] || [ -d .git/rebase-apply ] && echo "yes" || echo "no") + +if [ "$GIT_REBASE" = "yes" ]; then + alert "Git: stale rebase in progress on main repo" +fi +if [ "$GIT_BRANCH" != "master" ]; then + alert "Git: main repo on branch '${GIT_BRANCH}' instead of master" +fi + +# ============================================================================= +# CHECK 5: Infra — RAM, swap, disk, docker +# ============================================================================= +status "checking infra" + +AVAIL_MB=$(free -m | awk '/Mem:/{print $7}') +SWAP_USED_MB=$(free -m | awk '/Swap:/{print $3}') +DISK_PERCENT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%') + +if [ "${AVAIL_MB:-0}" -lt 500 ]; then + alert "RAM: only ${AVAIL_MB}MB available" +fi +if [ "${SWAP_USED_MB:-0}" -gt 3000 ]; then + alert "Swap: ${SWAP_USED_MB}MB used (>3GB)" +fi +if [ "${DISK_PERCENT:-0}" -gt 85 ]; then + alert "Disk: ${DISK_PERCENT}% full" +fi + +# Check if Anvil is responsive +ANVIL_OK=$(curl -sf -m 5 -X POST -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_chainId","params":[],"id":1}' \ + http://localhost:8545 2>/dev/null | jq -r '.result // "fail"' 2>/dev/null || echo "fail") +if [ "$ANVIL_OK" = "fail" ]; then + # Try to auto-fix + sudo docker restart harb-anvil-1 2>/dev/null && \ + log "Auto-fixed: restarted frozen Anvil" >> "$LOGFILE" || \ + alert "Anvil: unresponsive and restart failed" +fi + +# ============================================================================= +# CHECK 6: Review bot — unreviewed PRs older than 1h +# ============================================================================= +status "checking review backlog" + +for pr in $OPEN_PRS; do + PR_SHA=$(codeberg_api GET "/pulls/${pr}" 2>/dev/null | jq -r '.head.sha' 2>/dev/null || true) + [ -z "$PR_SHA" ] && continue + + CI_STATE=$(codeberg_api GET "/commits/${PR_SHA}/status" 2>/dev/null | jq -r '.state // "unknown"' 2>/dev/null || true) + [ "$CI_STATE" != "success" ] && continue + + # CI passed — check if reviewed at this SHA + HAS_REVIEW=$(codeberg_api GET "/issues/${pr}/comments?limit=50" 2>/dev/null | \ + jq -r --arg sha "$PR_SHA" '[.[] | select(.body | contains(" + +⚠️ Review failed: could not produce structured output after ${MAX_ATTEMPTS} attempts. + +A maintainer should review this PR manually, or re-trigger with \`--force\`. + +--- +*Failed at \`${PR_SHA:0:7}\`*" + + printf '%s' "$ERROR_BODY" > "${TMPDIR}/comment-body.txt" + jq -Rs '{body: .}' < "${TMPDIR}/comment-body.txt" > "${TMPDIR}/comment.json" + + curl -s -o /dev/null -w "%{http_code}" \ + -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API_BASE}/issues/${PR_NUMBER}/comments" \ + --data-binary @"${TMPDIR}/comment.json" > /dev/null + + # Save raw outputs for debugging + for f in "${TMPDIR}"/raw-attempt-*.txt; do + [ -f "$f" ] && cp "$f" "${LOGDIR}/review-pr${PR_NUMBER}-$(basename "$f")" + done + + openclaw system event \ + --text "⚠️ PR #${PR_NUMBER} review failed — no valid JSON output" \ + --mode now 2>/dev/null || true + + exit 1 +fi + +# --- Render JSON → Markdown --- +VERDICT=$(printf '%s' "$REVIEW_JSON" | jq -r '.verdict') +VERDICT_REASON=$(printf '%s' "$REVIEW_JSON" | jq -r '.verdict_reason // ""') + +render_markdown() { + local json="$1" + local md="" + + if [ "$IS_RE_REVIEW" = true ]; then + # Re-review format + local prev_count + prev_count=$(printf '%s' "$json" | jq '.previous_findings | length') + + if [ "$prev_count" -gt 0 ]; then + md+="### Previous Findings"$'\n' + while IFS= read -r finding; do + local summary status explanation + summary=$(printf '%s' "$finding" | jq -r '.summary') + status=$(printf '%s' "$finding" | jq -r '.status') + explanation=$(printf '%s' "$finding" | jq -r '.explanation') + + local icon="❓" + case "$status" in + fixed) icon="✅" ;; + not_fixed) icon="❌" ;; + partial) icon="⚠️" ;; + esac + + md+="- ${summary} → ${icon} ${explanation}"$'\n' + done < <(printf '%s' "$json" | jq -c '.previous_findings[]') + md+=$'\n' + fi + + local new_count + new_count=$(printf '%s' "$json" | jq '.new_issues | length') + if [ "$new_count" -gt 0 ]; then + md+="### New Issues"$'\n' + while IFS= read -r issue; do + local sev loc desc + sev=$(printf '%s' "$issue" | jq -r '.severity') + loc=$(printf '%s' "$issue" | jq -r '.location') + desc=$(printf '%s' "$issue" | jq -r '.description') + + local icon="ℹ️" + case "$sev" in + bug) icon="🐛" ;; + warning) icon="⚠️" ;; + nit) icon="💅" ;; + esac + + md+="- ${icon} **${sev}** \`${loc}\`: ${desc}"$'\n' + done < <(printf '%s' "$json" | jq -c '.new_issues[]') + md+=$'\n' + fi + + else + # Fresh review format + while IFS= read -r section; do + local title + title=$(printf '%s' "$section" | jq -r '.title') + local finding_count + finding_count=$(printf '%s' "$section" | jq '.findings | length') + + md+="### ${title}"$'\n' + + if [ "$finding_count" -eq 0 ]; then + md+="No issues found."$'\n'$'\n' + else + while IFS= read -r finding; do + local sev loc desc + sev=$(printf '%s' "$finding" | jq -r '.severity') + loc=$(printf '%s' "$finding" | jq -r '.location') + desc=$(printf '%s' "$finding" | jq -r '.description') + + local icon="ℹ️" + case "$sev" in + bug) icon="🐛" ;; + warning) icon="⚠️" ;; + nit) icon="💅" ;; + esac + + md+="- ${icon} **${sev}** \`${loc}\`: ${desc}"$'\n' + done < <(printf '%s' "$section" | jq -c '.findings[]') + md+=$'\n' + fi + done < <(printf '%s' "$json" | jq -c '.sections[]') + fi + + # Follow-ups + local followup_count + followup_count=$(printf '%s' "$json" | jq '.followups | length') + if [ "$followup_count" -gt 0 ]; then + md+="### Follow-up Issues"$'\n' + while IFS= read -r fu; do + local fu_title fu_details + fu_title=$(printf '%s' "$fu" | jq -r '.title') + fu_details=$(printf '%s' "$fu" | jq -r '.details') + md+="- **${fu_title}**: ${fu_details}"$'\n' + done < <(printf '%s' "$json" | jq -c '.followups[]') + md+=$'\n' + fi + + # Verdict + md+="### Verdict"$'\n' + md+="**${VERDICT}** — ${VERDICT_REASON}"$'\n' + + printf '%s' "$md" +} + +REVIEW_MD=$(render_markdown "$REVIEW_JSON") + +# --- Post review to Codeberg --- +status "posting to Codeberg" + +REVIEW_TYPE="Review" +if [ "$IS_RE_REVIEW" = true ]; then + ROUND=$(($(echo "$ALL_COMMENTS" | jq '[.[] | select(.body | contains(" + +${REVIEW_MD} + +--- +*Reviewed at \`${PR_SHA:0:7}\`$(if [ "$IS_RE_REVIEW" = true ]; then echo " · Previous: \`${PREV_REVIEW_SHA:0:7}\`"; fi) · [PRODUCT-TRUTH.md](../docs/PRODUCT-TRUTH.md) · [ARCHITECTURE.md](../docs/ARCHITECTURE.md)*" + +printf '%s' "$COMMENT_BODY" > "${TMPDIR}/comment-body.txt" +jq -Rs '{body: .}' < "${TMPDIR}/comment-body.txt" > "${TMPDIR}/comment.json" + +POST_CODE=$(curl -s -o "${TMPDIR}/post-response.txt" -w "%{http_code}" \ + -X POST \ + -H "Authorization: token ${REVIEW_BOT_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API_BASE}/issues/${PR_NUMBER}/comments" \ + --data-binary @"${TMPDIR}/comment.json") + +if [ "${POST_CODE}" = "201" ]; then + log "POSTED comment to Codeberg (as review_bot)" + + # Submit formal Codeberg review (required for branch protection approval) + REVIEW_EVENT="COMMENT" + case "$VERDICT" in + APPROVE) REVIEW_EVENT="APPROVED" ;; + REQUEST_CHANGES|DISCUSS) REVIEW_EVENT="REQUEST_CHANGES" ;; + esac + + FORMAL_BODY="AI ${REVIEW_TYPE}: **${VERDICT}** — ${VERDICT_REASON}" + jq -n --arg body "$FORMAL_BODY" --arg event "$REVIEW_EVENT" --arg sha "$PR_SHA" \ + '{body: $body, event: $event, commit_id: $sha}' > "${TMPDIR}/formal-review.json" + + REVIEW_CODE=$(curl -s -o "${TMPDIR}/review-response.txt" -w "%{http_code}" \ + -X POST \ + -H "Authorization: token ${REVIEW_BOT_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API_BASE}/pulls/${PR_NUMBER}/reviews" \ + --data-binary @"${TMPDIR}/formal-review.json") + + if [ "${REVIEW_CODE}" = "200" ]; then + log "SUBMITTED formal ${REVIEW_EVENT} review" + else + log "WARNING: formal review failed (HTTP ${REVIEW_CODE}): $(head -c 200 "${TMPDIR}/review-response.txt" 2>/dev/null)" + # Non-fatal — the comment is already posted + fi +else + log "ERROR: Codeberg HTTP ${POST_CODE}: $(head -c 200 "${TMPDIR}/post-response.txt" 2>/dev/null)" + echo "$REVIEW_MD" > "${LOGDIR}/review-pr${PR_NUMBER}-${PR_SHA:0:7}.md" + log "Review saved to ${LOGDIR}/review-pr${PR_NUMBER}-${PR_SHA:0:7}.md" + exit 1 +fi + +# --- Auto-create follow-up issues from JSON --- +FOLLOWUP_COUNT=$(printf '%s' "$REVIEW_JSON" | jq '.followups | length') +if [ "$FOLLOWUP_COUNT" -gt 0 ]; then + log "processing ${FOLLOWUP_COUNT} follow-up issues" + + TECH_DEBT_ID=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API_BASE}/labels" | jq -r '.[] | select(.name=="tech-debt") | .id') + + if [ -z "$TECH_DEBT_ID" ]; then + TECH_DEBT_ID=$(curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API_BASE}/labels" \ + -d '{"name":"tech-debt","color":"#6B7280","description":"Pre-existing tech debt flagged by AI review"}' | jq -r '.id') + fi + + CREATED_COUNT=0 + while IFS= read -r fu; do + FU_TITLE=$(printf '%s' "$fu" | jq -r '.title') + FU_DETAILS=$(printf '%s' "$fu" | jq -r '.details') + + # Check for duplicate + EXISTING=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \ + "${API_BASE}/issues?state=open&labels=tech-debt&limit=50" | \ + jq -r --arg t "$FU_TITLE" '[.[] | select(.title == $t)] | length') + + if [ "${EXISTING:-0}" -gt 0 ]; then + log "skip duplicate follow-up: ${FU_TITLE}" + continue + fi + + ISSUE_BODY="Flagged by AI reviewer in PR #${PR_NUMBER}. + +## Problem + +${FU_DETAILS} + +--- +*Auto-created from AI review of PR #${PR_NUMBER}*" + + printf '%s' "$ISSUE_BODY" > "${TMPDIR}/followup-body.txt" + jq -n \ + --arg title "$FU_TITLE" \ + --rawfile body "${TMPDIR}/followup-body.txt" \ + --argjson labels "[$TECH_DEBT_ID]" \ + '{title: $title, body: $body, labels: $labels}' > "${TMPDIR}/followup-issue.json" + + CREATED=$(curl -sf -X POST \ + -H "Authorization: token ${CODEBERG_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API_BASE}/issues" \ + --data-binary @"${TMPDIR}/followup-issue.json" | jq -r '.number // empty') + + if [ -n "$CREATED" ]; then + log "created follow-up issue #${CREATED}: ${FU_TITLE}" + CREATED_COUNT=$((CREATED_COUNT + 1)) + fi + done < <(printf '%s' "$REVIEW_JSON" | jq -c '.followups[]') + + log "created ${CREATED_COUNT} follow-up issues total" +fi + +# --- Notify OpenClaw --- +openclaw system event \ + --text "🤖 PR #${PR_NUMBER} ${REVIEW_TYPE}: ${VERDICT} — ${PR_TITLE}" \ + --mode now 2>/dev/null || true + +log "DONE: ${VERDICT} (${ELAPSED}s, re-review: ${IS_RE_REVIEW})"