Remove the head -10 cap from TECH_DEBT_ISSUES so Claude sees all tech-debt issues, not just the first 10. Apply a head -50 guard on the list passed in PROBLEMS to avoid oversized prompts while still feeding far more than the old cap. Update the problem label to drop "max 10 per run" text which contradicted the zero-tech-debt objective. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1156 lines
51 KiB
Bash
Executable file
1156 lines
51 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# gardener-poll.sh — Issue backlog grooming agent
|
|
#
|
|
# Cron: daily (or 2x/day). Reads open issues, detects problems, invokes
|
|
# claude -p to fix or escalate.
|
|
#
|
|
# Problems detected (bash, zero tokens):
|
|
# - Duplicate titles / overlapping scope
|
|
# - Missing acceptance criteria
|
|
# - Missing dependencies (references other issues but no dep link)
|
|
# - Oversized issues (too many acceptance criteria or change files)
|
|
# - Stale issues (no activity > 14 days, still open)
|
|
# - Closed issues with open dependents still referencing them
|
|
#
|
|
# Actions taken (claude -p):
|
|
# - Close duplicates with cross-reference comment
|
|
# - Add acceptance criteria template
|
|
# - Set dependency labels
|
|
# - Split oversized issues (create sub-issues, close parent)
|
|
# - Escalate decisions to human via openclaw system event
|
|
#
|
|
# Escalation format (compact, decision-ready):
|
|
# 🌱 Issue Gardener — N items need attention
|
|
# 1. #123 "title" — duplicate of #456? (a) close #123 (b) close #456 (c) merge scope
|
|
# 2. #789 "title" — needs decision: (a) backlog (b) wontfix (c) split into X,Y
|
|
# =============================================================================
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
|
|
|
|
# Load shared environment (with optional project TOML override)
|
|
# Usage: gardener-poll.sh [projects/harb.toml]
|
|
export PROJECT_TOML="${1:-}"
|
|
# shellcheck source=../lib/env.sh
|
|
source "$FACTORY_ROOT/lib/env.sh"
|
|
|
|
LOG_FILE="$SCRIPT_DIR/gardener.log"
|
|
LOCK_FILE="/tmp/gardener-poll.lock"
|
|
CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}"
|
|
|
|
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
|
|
|
|
# ── Lock ──────────────────────────────────────────────────────────────────
|
|
if [ -f "$LOCK_FILE" ]; then
|
|
LOCK_PID=$(cat "$LOCK_FILE" 2>/dev/null || true)
|
|
if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then
|
|
log "poll: gardener running (PID $LOCK_PID)"
|
|
exit 0
|
|
fi
|
|
rm -f "$LOCK_FILE"
|
|
fi
|
|
echo $$ > "$LOCK_FILE"
|
|
trap 'rm -f "$LOCK_FILE"' EXIT
|
|
|
|
log "--- Gardener poll start ---"
|
|
|
|
# ── Check for escalation replies from Matrix ──────────────────────────────
|
|
ESCALATION_REPLY=""
|
|
if [ -s /tmp/gardener-escalation-reply ]; then
|
|
ESCALATION_REPLY=$(cat /tmp/gardener-escalation-reply)
|
|
rm -f /tmp/gardener-escalation-reply
|
|
log "Got escalation reply: $(echo "$ESCALATION_REPLY" | head -1)"
|
|
fi
|
|
|
|
# ── Inject human replies into needs_human dev sessions (backup to supervisor) ─
|
|
HUMAN_REPLY_FILE="/tmp/dev-escalation-reply"
|
|
for _gr_phase_file in /tmp/dev-session-"${PROJECT_NAME}"-*.phase; do
|
|
[ -f "$_gr_phase_file" ] || continue
|
|
_gr_phase=$(head -1 "$_gr_phase_file" 2>/dev/null | tr -d '[:space:]' || true)
|
|
[ "$_gr_phase" = "PHASE:needs_human" ] || continue
|
|
|
|
_gr_issue=$(basename "$_gr_phase_file" .phase)
|
|
_gr_issue="${_gr_issue#dev-session-${PROJECT_NAME}-}"
|
|
[ -z "$_gr_issue" ] && continue
|
|
_gr_session="dev-${PROJECT_NAME}-${_gr_issue}"
|
|
|
|
tmux has-session -t "$_gr_session" 2>/dev/null || continue
|
|
|
|
# Atomic claim — only take the file once we know a session needs it
|
|
_gr_claimed="/tmp/dev-escalation-reply.gardener.$$"
|
|
[ -s "$HUMAN_REPLY_FILE" ] && mv "$HUMAN_REPLY_FILE" "$_gr_claimed" 2>/dev/null || continue
|
|
_gr_reply=$(cat "$_gr_claimed")
|
|
|
|
_gr_inject_msg="Human reply received for issue #${_gr_issue}:
|
|
|
|
${_gr_reply}
|
|
|
|
Instructions:
|
|
1. Read the human's guidance carefully.
|
|
2. Continue your work based on their input.
|
|
3. When done, push your changes and write the appropriate phase."
|
|
|
|
_gr_tmpfile=$(mktemp /tmp/human-inject-XXXXXX)
|
|
printf '%s' "$_gr_inject_msg" > "$_gr_tmpfile"
|
|
tmux load-buffer -b "human-inject-${_gr_issue}" "$_gr_tmpfile" || true
|
|
tmux paste-buffer -t "$_gr_session" -b "human-inject-${_gr_issue}" || true
|
|
sleep 0.5
|
|
tmux send-keys -t "$_gr_session" "" Enter || true
|
|
tmux delete-buffer -b "human-inject-${_gr_issue}" 2>/dev/null || true
|
|
rm -f "$_gr_tmpfile" "$_gr_claimed"
|
|
|
|
rm -f "/tmp/dev-renotify-${PROJECT_NAME}-${_gr_issue}"
|
|
log "${PROJECT_NAME}: #${_gr_issue} human reply injected into session ${_gr_session} (gardener)"
|
|
break # only one reply to deliver
|
|
done
|
|
|
|
# ── Fetch all open issues ─────────────────────────────────────────────────
|
|
ISSUES_JSON=$(codeberg_api GET "/issues?state=open&type=issues&limit=50&sort=updated&direction=desc" 2>/dev/null || true)
|
|
if [ -z "$ISSUES_JSON" ] || [ "$ISSUES_JSON" = "null" ]; then
|
|
log "Failed to fetch issues"
|
|
exit 1
|
|
fi
|
|
|
|
ISSUE_COUNT=$(echo "$ISSUES_JSON" | jq 'length')
|
|
log "Found $ISSUE_COUNT open issues"
|
|
|
|
if [ "$ISSUE_COUNT" -eq 0 ]; then
|
|
log "No open issues — nothing to groom"
|
|
exit 0
|
|
fi
|
|
|
|
# ── Bash pre-checks (zero tokens) ────────────────────────────────────────
|
|
|
|
PROBLEMS=""
|
|
|
|
# 1. Duplicate detection: issues with very similar titles
|
|
TITLES=$(echo "$ISSUES_JSON" | jq -r '.[] | "\(.number)\t\(.title)"')
|
|
DUPES=""
|
|
while IFS=$'\t' read -r num1 title1; do
|
|
while IFS=$'\t' read -r num2 title2; do
|
|
[ "$num1" -ge "$num2" ] && continue
|
|
# Normalize: lowercase, strip prefixes + series names, collapse whitespace
|
|
t1=$(echo "$title1" | tr '[:upper:]' '[:lower:]' | sed 's/^feat:\|^fix:\|^refactor://;s/llm seed[^—]*—\s*//;s/push3 evolution[^—]*—\s*//;s/[^a-z0-9 ]//g;s/ */ /g')
|
|
t2=$(echo "$title2" | tr '[:upper:]' '[:lower:]' | sed 's/^feat:\|^fix:\|^refactor://;s/llm seed[^—]*—\s*//;s/push3 evolution[^—]*—\s*//;s/[^a-z0-9 ]//g;s/ */ /g')
|
|
# Count shared words (>60% overlap = suspect)
|
|
WORDS1=$(echo "$t1" | tr ' ' '\n' | sort -u)
|
|
WORDS2=$(echo "$t2" | tr ' ' '\n' | sort -u)
|
|
SHARED=$(comm -12 <(echo "$WORDS1") <(echo "$WORDS2") | wc -l)
|
|
TOTAL1=$(echo "$WORDS1" | wc -l)
|
|
TOTAL2=$(echo "$WORDS2" | wc -l)
|
|
MIN_TOTAL=$(( TOTAL1 < TOTAL2 ? TOTAL1 : TOTAL2 ))
|
|
if [ "$MIN_TOTAL" -gt 2 ] && [ "$SHARED" -gt 0 ]; then
|
|
OVERLAP=$(( SHARED * 100 / MIN_TOTAL ))
|
|
if [ "$OVERLAP" -ge 60 ]; then
|
|
DUPES="${DUPES}possible_dupe: #${num1} vs #${num2} (${OVERLAP}% word overlap)\n"
|
|
fi
|
|
fi
|
|
done <<< "$TITLES"
|
|
done <<< "$TITLES"
|
|
[ -n "$DUPES" ] && PROBLEMS="${PROBLEMS}${DUPES}"
|
|
|
|
# 2. Missing acceptance criteria: issues with short body and no checkboxes
|
|
while IFS=$'\t' read -r num body_len has_checkbox; do
|
|
if [ "$body_len" -lt 100 ] && [ "$has_checkbox" = "false" ]; then
|
|
PROBLEMS="${PROBLEMS}thin_issue: #${num} — body < 100 chars, no acceptance criteria\n"
|
|
fi
|
|
done < <(echo "$ISSUES_JSON" | jq -r '.[] | "\(.number)\t\(.body | length)\t\(.body | test("- \\[[ x]\\]") // false)"')
|
|
|
|
# 3. Stale issues: no update in 14+ days
|
|
NOW_EPOCH=$(date +%s)
|
|
while IFS=$'\t' read -r num updated_at; do
|
|
UPDATED_EPOCH=$(date -d "$updated_at" +%s 2>/dev/null || echo 0)
|
|
AGE_DAYS=$(( (NOW_EPOCH - UPDATED_EPOCH) / 86400 ))
|
|
if [ "$AGE_DAYS" -ge 14 ]; then
|
|
PROBLEMS="${PROBLEMS}stale: #${num} — no activity for ${AGE_DAYS} days\n"
|
|
fi
|
|
done < <(echo "$ISSUES_JSON" | jq -r '.[] | "\(.number)\t\(.updated_at)"')
|
|
|
|
# 4. Issues referencing closed deps
|
|
while IFS=$'\t' read -r num body; do
|
|
REFS=$(echo "$body" | grep -oP '#\d+' | grep -oP '\d+' | sort -u || true)
|
|
for ref in $REFS; do
|
|
[ "$ref" = "$num" ] && continue
|
|
REF_STATE=$(echo "$ISSUES_JSON" | jq -r --arg n "$ref" '.[] | select(.number == ($n | tonumber)) | .state' 2>/dev/null || true)
|
|
# If ref not in our open set, check if it's closed
|
|
if [ -z "$REF_STATE" ]; then
|
|
REF_STATE=$(codeberg_api GET "/issues/$ref" 2>/dev/null | jq -r '.state // "unknown"' 2>/dev/null || true)
|
|
# Rate limit protection
|
|
sleep 0.5
|
|
fi
|
|
done
|
|
done < <(echo "$ISSUES_JSON" | jq -r '.[] | "\(.number)\t\(.body // "")"' | head -20)
|
|
|
|
# 5. Blocker detection: find issues blocking backlog items that aren't themselves backlog
|
|
# This is the HIGHEST PRIORITY — a non-backlog blocker starves the entire factory
|
|
BACKLOG_ISSUES=$(echo "$ISSUES_JSON" | jq -r '.[] | select(.labels | map(.name) | index("backlog")) | .number')
|
|
BLOCKER_NUMS=""
|
|
for BNUM in $BACKLOG_ISSUES; do
|
|
BBODY=$(echo "$ISSUES_JSON" | jq -r --arg n "$BNUM" '.[] | select(.number == ($n | tonumber)) | .body // ""')
|
|
# Extract deps from ## Dependencies / ## Depends on / ## Blocked by
|
|
IN_SECTION=false
|
|
while IFS= read -r line; do
|
|
if echo "$line" | grep -qiP '^##?\s*(Dependencies|Depends on|Blocked by)'; then IN_SECTION=true; continue; fi
|
|
if echo "$line" | grep -qP '^##?\s' && [ "$IN_SECTION" = true ]; then IN_SECTION=false; fi
|
|
if [ "$IN_SECTION" = true ]; then
|
|
for dep in $(echo "$line" | grep -oP '#\d+' | grep -oP '\d+'); do
|
|
[ "$dep" = "$BNUM" ] && continue
|
|
# Check if dep is open but NOT backlog-labeled
|
|
DEP_STATE=$(echo "$ISSUES_JSON" | jq -r --arg n "$dep" '.[] | select(.number == ($n | tonumber)) | .state' 2>/dev/null || true)
|
|
DEP_LABELS=$(echo "$ISSUES_JSON" | jq -r --arg n "$dep" '.[] | select(.number == ($n | tonumber)) | [.labels[].name] | join(",")' 2>/dev/null || true)
|
|
if [ "$DEP_STATE" = "open" ] && ! echo ",$DEP_LABELS," | grep -q ',backlog,'; then
|
|
BLOCKER_NUMS="${BLOCKER_NUMS} ${dep}"
|
|
fi
|
|
done
|
|
fi
|
|
done <<< "$BBODY"
|
|
done
|
|
# Deduplicate blockers
|
|
BLOCKER_NUMS=$(echo "$BLOCKER_NUMS" | tr ' ' '\n' | sort -un | head -10)
|
|
if [ -n "$BLOCKER_NUMS" ]; then
|
|
BLOCKER_LIST=""
|
|
for bnum in $BLOCKER_NUMS; do
|
|
BTITLE=$(echo "$ISSUES_JSON" | jq -r --arg n "$bnum" '.[] | select(.number == ($n | tonumber)) | .title' 2>/dev/null || true)
|
|
BLABELS=$(echo "$ISSUES_JSON" | jq -r --arg n "$bnum" '.[] | select(.number == ($n | tonumber)) | [.labels[].name] | join(",")' 2>/dev/null || true)
|
|
BLOCKER_LIST="${BLOCKER_LIST}#${bnum} [${BLABELS:-unlabeled}] ${BTITLE}\n"
|
|
done
|
|
PROBLEMS="${PROBLEMS}PRIORITY_blockers_starving_factory: these issues block backlog items but are NOT labeled backlog — promote them FIRST:\n${BLOCKER_LIST}\n"
|
|
fi
|
|
|
|
# 6. Tech-debt issues needing promotion to backlog (secondary to blockers)
|
|
TECH_DEBT_ISSUES=$(echo "$ISSUES_JSON" | jq -r '.[] | select(.labels | map(.name) | index("tech-debt")) | "#\(.number) \(.title)"')
|
|
if [ -n "$TECH_DEBT_ISSUES" ]; then
|
|
TECH_DEBT_COUNT=$(echo "$TECH_DEBT_ISSUES" | wc -l)
|
|
PROBLEMS="${PROBLEMS}tech_debt_promotion: ${TECH_DEBT_COUNT} tech-debt issues need processing (goal: zero tech-debt):\n$(echo "$TECH_DEBT_ISSUES" | head -50)\n"
|
|
fi
|
|
|
|
PROBLEM_COUNT=$(echo -e "$PROBLEMS" | grep -c '.' || true)
|
|
log "Detected $PROBLEM_COUNT potential problems"
|
|
|
|
if [ "$PROBLEM_COUNT" -eq 0 ]; then
|
|
log "Backlog is clean — nothing to groom"
|
|
exit 0
|
|
fi
|
|
|
|
# ── Invoke claude -p ──────────────────────────────────────────────────────
|
|
log "Invoking claude -p for grooming"
|
|
|
|
# Build issue summary for context (titles + labels + deps)
|
|
ISSUE_SUMMARY=$(echo "$ISSUES_JSON" | jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"')
|
|
|
|
# Build list of issues already staged as dust (so LLM doesn't re-emit them)
|
|
DUST_FILE="$SCRIPT_DIR/dust.jsonl"
|
|
STAGED_DUST=""
|
|
if [ -s "$DUST_FILE" ]; then
|
|
STAGED_DUST=$(jq -r '"#\(.issue) (\(.group))"' "$DUST_FILE" 2>/dev/null | sort -u || true)
|
|
fi
|
|
|
|
PROMPT="You are the issue gardener for ${CODEBERG_REPO}. Your job: keep the backlog clean, well-structured, and actionable.
|
|
|
|
## Current open issues
|
|
$ISSUE_SUMMARY
|
|
|
|
## Problems detected
|
|
$(echo -e "$PROBLEMS")
|
|
|
|
## Tools available
|
|
- Codeberg API: use curl with the CODEBERG_TOKEN env var (already set in your environment)
|
|
- Base URL: ${CODEBERG_API}
|
|
- Read issue: \`curl -sf -H \"Authorization: token \$CODEBERG_TOKEN\" '${CODEBERG_API}/issues/{number}' | jq '.body'\`
|
|
- Relabel: \`curl -sf -H \"Authorization: token \$CODEBERG_TOKEN\" -X PUT -H 'Content-Type: application/json' '${CODEBERG_API}/issues/{number}/labels' -d '{\"labels\":[LABEL_ID]}'\`
|
|
- Comment: \`curl -sf -H \"Authorization: token \$CODEBERG_TOKEN\" -X POST -H 'Content-Type: application/json' '${CODEBERG_API}/issues/{number}/comments' -d '{\"body\":\"...\"}'\`
|
|
- Close: \`curl -sf -H \"Authorization: token \$CODEBERG_TOKEN\" -X PATCH -H 'Content-Type: application/json' '${CODEBERG_API}/issues/{number}' -d '{\"state\":\"closed\"}'\`
|
|
- Edit body: \`curl -sf -H \"Authorization: token \$CODEBERG_TOKEN\" -X PATCH -H 'Content-Type: application/json' '${CODEBERG_API}/issues/{number}' -d '{\"body\":\"new body\"}'\`
|
|
- List labels: \`curl -sf -H \"Authorization: token \$CODEBERG_TOKEN\" '${CODEBERG_API}/labels'\` (to find label IDs)
|
|
- NEVER echo, log, or include the actual token value in any output — always reference \$CODEBERG_TOKEN
|
|
- You're running in the project repo root. Read README.md and any docs/ files before making decisions.
|
|
|
|
## Primary mission: unblock the factory
|
|
Issues prefixed with PRIORITY_blockers_starving_factory are your TOP priority. These are non-backlog issues that block existing backlog items — the dev-agent is completely starved until these are promoted. Process ALL of them before touching regular tech-debt.
|
|
|
|
## Your objective: zero tech-debt issues
|
|
|
|
Tech-debt is unprocessed work — it sits outside the factory pipeline
|
|
(dev-agent only pulls backlog). Every tech-debt issue is a decision
|
|
you haven't made yet:
|
|
|
|
- Substantial? → promote to backlog (add affected files, acceptance
|
|
criteria, dependencies)
|
|
- Dust? → bundle into an ore issue
|
|
- Duplicate? → close with cross-reference
|
|
- Invalid/wontfix? → close with explanation
|
|
- Needs human decision? → escalate
|
|
|
|
Process ALL tech-debt issues every run. The goal is zero tech-debt
|
|
when you're done. If you can't reach zero (needs human input,
|
|
unclear scope), escalate those specifically and close out everything
|
|
else.
|
|
|
|
Tech-debt is your inbox. An empty inbox is a healthy factory.
|
|
|
|
## Dust vs Ore — bundle trivial tech-debt
|
|
Don't promote trivial tech-debt individually — each costs a full factory cycle (CI + dev-agent + review + merge). If an issue is dust (comment fix, rename, style-only, single-line change, trivial cleanup), output a DUST line instead of promoting:
|
|
|
|
DUST: {\"issue\": NNN, \"group\": \"<file-or-subsystem>\", \"title\": \"issue title\", \"reason\": \"why it's dust\"}
|
|
|
|
Group by file or subsystem (e.g. \"gardener\", \"lib/env.sh\", \"dev-poll\"). The script collects dust items into a staging file. When a group accumulates 3+ items, the script bundles them into one backlog issue automatically.
|
|
|
|
Only promote tech-debt that is substantial: multi-file changes, behavioral fixes, architectural improvements. Dust is any issue where the fix is a single-line edit, a rename, a comment tweak, or a style-only change.
|
|
$(if [ -n "$STAGED_DUST" ]; then echo "
|
|
These issues are ALREADY staged as dust — do NOT emit DUST lines for them again:
|
|
${STAGED_DUST}"; fi)
|
|
|
|
## Other rules
|
|
1. **Duplicates**: If confident (>80% overlap + same scope after reading bodies), close the newer one with a comment referencing the older. If unsure, ESCALATE.
|
|
2. **Thin issues** (non-tech-debt): Add acceptance criteria. Read the body first.
|
|
3. **Stale issues**: If clearly superseded or no longer relevant, close with explanation. If unclear, ESCALATE.
|
|
4. **Oversized issues**: If >5 acceptance criteria touching different files/concerns, ESCALATE with suggested split.
|
|
5. **Dependencies**: If an issue references another that must land first, add a \`## Dependencies\n- #NNN\` section if missing.
|
|
6. **Sibling issues**: When creating multiple issues from the same source (PR review, code audit), NEVER add bidirectional dependencies between them. Siblings are independent work items, not parent/child. Use \`## Related\n- #NNN (sibling)\` for cross-references between siblings — NOT \`## Dependencies\`. The dev-poll \`get_deps()\` parser only reads \`## Dependencies\` / \`## Depends on\` / \`## Blocked by\` headers, so \`## Related\` is safely ignored. Bidirectional deps create permanent deadlocks that stall the entire factory.
|
|
|
|
## Escalation format
|
|
For anything needing human decision, output EXACTLY this format (one block, all items):
|
|
\`\`\`
|
|
ESCALATE
|
|
1. #NNN \"title\" — reason (a) option1 (b) option2 (c) option3
|
|
2. #NNN \"title\" — reason (a) option1 (b) option2
|
|
\`\`\`
|
|
|
|
## Output format (MANDATORY — the script parses these exact prefixes)
|
|
- After EVERY action you take, print exactly: ACTION: <description>
|
|
- For trivial tech-debt (dust), print exactly: DUST: {\"issue\": NNN, \"group\": \"<subsystem>\", \"title\": \"...\", \"reason\": \"...\"}
|
|
- For issues needing human decision, output EXACTLY:
|
|
ESCALATE
|
|
1. #NNN \"title\" — reason (a) option1 (b) option2
|
|
- If truly nothing to do, print: CLEAN
|
|
|
|
## Important
|
|
- You MUST process the tech_debt_promotion items listed above. Read each issue, add acceptance criteria + affected files, then relabel to backlog.
|
|
- If an issue is ambiguous or needs a design decision, ESCALATE it — don't skip it silently.
|
|
- Every tech-debt issue in the list above should result in either an ACTION (promoted) or an ESCALATE (needs decision). Never skip silently.
|
|
$(if [ -n "$ESCALATION_REPLY" ]; then echo "
|
|
## Human Response to Previous Escalation
|
|
The human replied with shorthand choices keyed to the previous ESCALATE block.
|
|
Format: '1a 2c 3b' means question 1→option (a), question 2→option (c), question 3→option (b).
|
|
|
|
Raw reply:
|
|
${ESCALATION_REPLY}
|
|
|
|
Execute each chosen option NOW via the Codeberg API before processing new items.
|
|
If a choice is unclear, re-escalate that single item with a clarifying question."; fi)"
|
|
|
|
CLAUDE_OUTPUT=$(cd "${PROJECT_REPO_ROOT}" && CODEBERG_TOKEN="$CODEBERG_TOKEN" timeout "$CLAUDE_TIMEOUT" \
|
|
claude -p "$PROMPT" \
|
|
--model sonnet \
|
|
--dangerously-skip-permissions \
|
|
--max-turns 30 \
|
|
2>/dev/null) || true
|
|
|
|
log "claude finished ($(echo "$CLAUDE_OUTPUT" | wc -c) bytes)"
|
|
|
|
# ── Parse escalations ────────────────────────────────────────────────────
|
|
ESCALATION=$(echo "$CLAUDE_OUTPUT" | sed -n '/^ESCALATE$/,/^```$/p' | grep -v '^ESCALATE$\|^```$' || true)
|
|
if [ -z "$ESCALATION" ]; then
|
|
ESCALATION=$(echo "$CLAUDE_OUTPUT" | grep -A50 "^ESCALATE" | grep '^\d' || true)
|
|
fi
|
|
|
|
if [ -n "$ESCALATION" ]; then
|
|
ITEM_COUNT=$(echo "$ESCALATION" | grep -c '.' || true)
|
|
log "Escalating $ITEM_COUNT items to human"
|
|
|
|
# Send via Matrix (threaded — replies route back via listener)
|
|
matrix_send "gardener" "🌱 Issue Gardener — ${ITEM_COUNT} item(s) need attention
|
|
|
|
${ESCALATION}
|
|
|
|
Reply with numbers+letters (e.g. 1a 2c) to decide." 2>/dev/null || true
|
|
fi
|
|
|
|
# ── Log actions taken ─────────────────────────────────────────────────────
|
|
ACTIONS=$(echo "$CLAUDE_OUTPUT" | grep "^ACTION:" || true)
|
|
if [ -n "$ACTIONS" ]; then
|
|
echo "$ACTIONS" | while read -r line; do
|
|
log " $line"
|
|
done
|
|
fi
|
|
|
|
# ── Collect dust items ───────────────────────────────────────────────────
|
|
# DUST_FILE already set above (before prompt construction)
|
|
DUST_LINES=$(echo "$CLAUDE_OUTPUT" | grep "^DUST: " | sed 's/^DUST: //' || true)
|
|
if [ -n "$DUST_LINES" ]; then
|
|
# Build set of issue numbers already in dust.jsonl for dedup
|
|
EXISTING_DUST_ISSUES=""
|
|
if [ -s "$DUST_FILE" ]; then
|
|
EXISTING_DUST_ISSUES=$(jq -r '.issue' "$DUST_FILE" 2>/dev/null | sort -nu || true)
|
|
fi
|
|
|
|
DUST_COUNT=0
|
|
while IFS= read -r dust_json; do
|
|
[ -z "$dust_json" ] && continue
|
|
# Validate JSON
|
|
if ! echo "$dust_json" | jq -e '.issue and .group' >/dev/null 2>&1; then
|
|
log "WARNING: invalid dust JSON: $dust_json"
|
|
continue
|
|
fi
|
|
# Deduplicate: skip if this issue is already staged
|
|
dust_issue_num=$(echo "$dust_json" | jq -r '.issue')
|
|
if echo "$EXISTING_DUST_ISSUES" | grep -qx "$dust_issue_num" 2>/dev/null; then
|
|
log "Skipping duplicate dust entry for issue #${dust_issue_num}"
|
|
continue
|
|
fi
|
|
EXISTING_DUST_ISSUES="${EXISTING_DUST_ISSUES}
|
|
${dust_issue_num}"
|
|
echo "$dust_json" | jq -c '. + {"ts": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$DUST_FILE"
|
|
DUST_COUNT=$((DUST_COUNT + 1))
|
|
done <<< "$DUST_LINES"
|
|
log "Collected $DUST_COUNT dust item(s) (duplicates skipped)"
|
|
fi
|
|
|
|
# ── Expire stale dust entries (30-day TTL) ───────────────────────────────
|
|
if [ -s "$DUST_FILE" ]; then
|
|
CUTOFF=$(date -u -d '30 days ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || true)
|
|
if [ -n "$CUTOFF" ]; then
|
|
BEFORE_COUNT=$(wc -l < "$DUST_FILE")
|
|
if jq -c --arg c "$CUTOFF" 'select(.ts >= $c)' "$DUST_FILE" > "${DUST_FILE}.ttl" 2>/dev/null; then
|
|
mv "${DUST_FILE}.ttl" "$DUST_FILE"
|
|
AFTER_COUNT=$(wc -l < "$DUST_FILE")
|
|
EXPIRED=$((BEFORE_COUNT - AFTER_COUNT))
|
|
[ "$EXPIRED" -gt 0 ] && log "Expired $EXPIRED stale dust entries (>30 days old)"
|
|
else
|
|
rm -f "${DUST_FILE}.ttl"
|
|
log "WARNING: TTL cleanup failed — dust.jsonl left unchanged"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# ── Bundle dust groups with 3+ distinct issues ──────────────────────────
|
|
if [ -s "$DUST_FILE" ]; then
|
|
# Count distinct issues per group (not raw entries)
|
|
DUST_GROUPS=$(jq -r '[.group, (.issue | tostring)] | join("\t")' "$DUST_FILE" 2>/dev/null \
|
|
| sort -u | cut -f1 | sort | uniq -c | sort -rn || true)
|
|
while read -r count group; do
|
|
[ -z "$group" ] && continue
|
|
[ "$count" -lt 3 ] && continue
|
|
|
|
log "Bundling dust group '$group' ($count distinct issues)"
|
|
|
|
# Collect deduplicated issue references and details for this group
|
|
BUNDLE_ISSUES=$(jq -r --arg g "$group" 'select(.group == $g) | "#\(.issue) \(.title // "untitled") — \(.reason // "dust")"' "$DUST_FILE" | sort -u)
|
|
BUNDLE_ISSUE_NUMS=$(jq -r --arg g "$group" 'select(.group == $g) | .issue' "$DUST_FILE" | sort -nu)
|
|
DISTINCT_COUNT=$(echo "$BUNDLE_ISSUE_NUMS" | grep -c '.' || true)
|
|
|
|
bundle_title="fix: bundled dust cleanup — ${group}"
|
|
bundle_body="## Bundled dust cleanup — \`${group}\`
|
|
|
|
Gardener bundled ${DISTINCT_COUNT} trivial tech-debt items into one issue to save factory cycles.
|
|
|
|
### Items
|
|
$(echo "$BUNDLE_ISSUES" | sed 's/^/- /')
|
|
|
|
### Instructions
|
|
Fix all items above in a single PR. Each is a small change (rename, comment, style fix, single-line edit).
|
|
|
|
### Affected files
|
|
- Files in \`${group}\` subsystem
|
|
|
|
### Acceptance criteria
|
|
- [ ] All listed items resolved
|
|
- [ ] ShellCheck passes"
|
|
|
|
new_bundle=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues" \
|
|
-d "$(jq -nc --arg t "$bundle_title" --arg b "$bundle_body" \
|
|
'{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true
|
|
|
|
if [ -n "$new_bundle" ]; then
|
|
log "Created bundle issue #${new_bundle} for dust group '$group' ($DISTINCT_COUNT items)"
|
|
matrix_send "gardener" "📦 Bundled ${DISTINCT_COUNT} dust items (${group}) → #${new_bundle}" 2>/dev/null || true
|
|
|
|
# Close source issues with cross-reference
|
|
for src_issue in $BUNDLE_ISSUE_NUMS; do
|
|
curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues/${src_issue}/comments" \
|
|
-d "$(jq -nc --arg b "Bundled into #${new_bundle} (dust cleanup)" '{"body":$b}')" 2>/dev/null || true
|
|
curl -sf -X PATCH \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues/${src_issue}" \
|
|
-d '{"state":"closed"}' 2>/dev/null || true
|
|
log "Closed source issue #${src_issue} → bundled into #${new_bundle}"
|
|
done
|
|
|
|
# Remove bundled items from dust.jsonl — only if jq succeeds
|
|
if jq -c --arg g "$group" 'select(.group != $g)' "$DUST_FILE" > "${DUST_FILE}.tmp" 2>/dev/null; then
|
|
mv "${DUST_FILE}.tmp" "$DUST_FILE"
|
|
else
|
|
rm -f "${DUST_FILE}.tmp"
|
|
log "WARNING: failed to prune bundled group '$group' from dust.jsonl"
|
|
fi
|
|
fi
|
|
done <<< "$DUST_GROUPS"
|
|
fi
|
|
|
|
|
|
# ── Recipe matching engine ────────────────────────────────────────────────
|
|
RECIPE_DIR="$SCRIPT_DIR/recipes"
|
|
|
|
# match_recipe — Find first matching recipe for escalation context
|
|
# Args: $1=step_names_json $2=output_file_path $3=pr_info_json
|
|
# Stdout: JSON {name, playbook} — "generic" fallback if no match
|
|
match_recipe() {
|
|
_mr_stderr=$(mktemp /tmp/recipe-match-err-XXXXXX)
|
|
_mr_result=$(RECIPE_DIR="$RECIPE_DIR" python3 - "$1" "$2" "$3" 2>"$_mr_stderr" <<'PYEOF'
|
|
import sys, os, re, json, glob
|
|
try:
|
|
import tomllib
|
|
except ModuleNotFoundError:
|
|
import tomli as tomllib # Python < 3.11 fallback (pip install tomli)
|
|
|
|
recipe_dir = os.environ["RECIPE_DIR"]
|
|
recipes = []
|
|
for path in sorted(glob.glob(os.path.join(recipe_dir, "*.toml"))):
|
|
with open(path, "rb") as f:
|
|
recipes.append(tomllib.load(f))
|
|
|
|
recipes.sort(key=lambda r: r.get("priority", 50))
|
|
|
|
step_names = json.loads(sys.argv[1])
|
|
output_path = sys.argv[2]
|
|
pr_info = json.loads(sys.argv[3])
|
|
|
|
step_output = ""
|
|
if os.path.isfile(output_path):
|
|
with open(output_path) as f:
|
|
step_output = f.read()
|
|
|
|
for recipe in recipes:
|
|
trigger = recipe.get("trigger", {})
|
|
matched = True
|
|
|
|
if matched and "step_name" in trigger:
|
|
if not any(re.search(trigger["step_name"], n) for n in step_names):
|
|
matched = False
|
|
|
|
if matched and "output" in trigger:
|
|
if not re.search(trigger["output"], step_output):
|
|
matched = False
|
|
|
|
if matched and "pr_mergeable" in trigger:
|
|
if pr_info.get("mergeable") != trigger["pr_mergeable"]:
|
|
matched = False
|
|
|
|
if matched and "pr_files" in trigger:
|
|
changed = pr_info.get("changed_files", [])
|
|
if not any(re.search(trigger["pr_files"], f) for f in changed):
|
|
matched = False
|
|
|
|
if matched and "min_attempts" in trigger:
|
|
if pr_info.get("attempts", 1) < trigger["min_attempts"]:
|
|
matched = False
|
|
|
|
if matched and trigger.get("failures_on_unchanged"):
|
|
# Check if errors reference files NOT changed in the PR
|
|
# Patterns: ShellCheck "In file.sh line 5:", generic "file.sh:5:10: error",
|
|
# ESLint/pylint "file.py:10:5: E123", Go "file.go:5:3:"
|
|
error_files = set()
|
|
error_files.update(re.findall(r"(?<=In )\S+(?= line \d+:)", step_output))
|
|
error_files.update(re.findall(r"^(\S+\.\w+):\d+", step_output, re.MULTILINE))
|
|
changed = set(pr_info.get("changed_files", []))
|
|
if not error_files or error_files <= changed:
|
|
matched = False
|
|
|
|
if matched:
|
|
print(json.dumps({"name": recipe["name"], "playbook": recipe.get("playbook", [])}))
|
|
sys.exit(0)
|
|
|
|
print(json.dumps({"name": "generic", "playbook": [{"action": "create-generic-issue"}]}))
|
|
PYEOF
|
|
) || true
|
|
if [ -s "$_mr_stderr" ]; then
|
|
log "WARNING: match_recipe error: $(head -3 "$_mr_stderr" | tr '\n' ' ')"
|
|
fi
|
|
rm -f "$_mr_stderr"
|
|
if [ -z "$_mr_result" ] || ! echo "$_mr_result" | jq -e '.name' >/dev/null 2>&1; then
|
|
echo '{"name":"generic","playbook":[{"action":"create-generic-issue"}]}'
|
|
else
|
|
echo "$_mr_result"
|
|
fi
|
|
}
|
|
|
|
# ── Playbook action functions ────────────────────────────────────────────
|
|
# Globals used by playbook functions (set by escalation loop):
|
|
# ESC_ISSUE, ESC_PR, ESC_ATTEMPTS, ESC_PIPELINE — escalation context
|
|
# _PB_FAILED_STEPS — "pid\tname" per line of failed CI steps
|
|
# _PB_LOG_DIR — temp dir with step-{pid}.log files
|
|
# _PB_SUB_CREATED — sub-issue counter for current escalation
|
|
# _esc_total_created — running total across all escalations
|
|
|
|
# Create per-file ShellCheck sub-issues from CI output
|
|
playbook_shellcheck_per_file() {
|
|
local step_pid step_name step_log_file step_logs
|
|
while IFS=$'\t' read -r step_pid step_name; do
|
|
[ -z "$step_pid" ] && continue
|
|
echo "$step_name" | grep -qi "shellcheck" || continue
|
|
step_log_file="${_PB_LOG_DIR}/step-${step_pid}.log"
|
|
[ -f "$step_log_file" ] || continue
|
|
step_logs=$(cat "$step_log_file")
|
|
|
|
local sc_files
|
|
sc_files=$(echo "$step_logs" | grep -oP '(?<=In )\S+(?= line \d+:)' | sort -u || true)
|
|
|
|
local sc_file file_errors sc_codes sub_title sub_body new_issue
|
|
while IFS= read -r sc_file; do
|
|
[ -z "$sc_file" ] && continue
|
|
# grep -F for literal filename match (dots in filenames are regex wildcards)
|
|
file_errors=$(echo "$step_logs" | grep -F -A3 "In ${sc_file} line" | head -30)
|
|
# SC codes only from this file's errors, not the whole step log
|
|
sc_codes=$(echo "$file_errors" | grep -oP 'SC\d+' | sort -u | tr '\n' ' ' | sed 's/ $//' || true)
|
|
|
|
sub_title="fix: ShellCheck errors in ${sc_file} (from PR #${ESC_PR})"
|
|
sub_body="## ShellCheck CI failure — \`${sc_file}\`
|
|
|
|
Spawned by gardener from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)).
|
|
|
|
### Errors
|
|
\`\`\`
|
|
${file_errors}
|
|
\`\`\`
|
|
|
|
Fix all ShellCheck errors${sc_codes:+ (${sc_codes})} in \`${sc_file}\` so PR #${ESC_PR} CI passes.
|
|
|
|
### Context
|
|
- Parent issue: #${ESC_ISSUE}
|
|
- PR: #${ESC_PR}
|
|
- Pipeline: #${ESC_PIPELINE} (step: ${step_name})"
|
|
|
|
new_issue=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues" \
|
|
-d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \
|
|
'{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true
|
|
|
|
if [ -n "$new_issue" ]; then
|
|
log "Created sub-issue #${new_issue}: ShellCheck in ${sc_file} (from #${ESC_ISSUE})"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
_esc_total_created=$((_esc_total_created + 1))
|
|
matrix_send "gardener" "📋 Created sub-issue #${new_issue}: ShellCheck in ${sc_file} (from escalated #${ESC_ISSUE})" 2>/dev/null || true
|
|
fi
|
|
done <<< "$sc_files"
|
|
done <<< "$_PB_FAILED_STEPS"
|
|
}
|
|
|
|
# Create per-file issues from any lint/check CI output (generic — no step name filter)
|
|
playbook_lint_per_file() {
|
|
local step_pid step_name step_log_file step_logs
|
|
while IFS=$'\t' read -r step_pid step_name; do
|
|
[ -z "$step_pid" ] && continue
|
|
step_log_file="${_PB_LOG_DIR}/step-${step_pid}.log"
|
|
[ -f "$step_log_file" ] || continue
|
|
step_logs=$(cat "$step_log_file")
|
|
|
|
# Extract unique file paths from lint output (multiple formats):
|
|
# ShellCheck: "In file.sh line 5:"
|
|
# Generic: "file.sh:5:10: error"
|
|
local lint_files
|
|
lint_files=$( {
|
|
echo "$step_logs" | grep -oP '(?<=In )\S+(?= line \d+:)' || true
|
|
echo "$step_logs" | grep -oP '^\S+\.\w+(?=:\d+)' || true
|
|
} | sort -u)
|
|
|
|
local lint_file file_errors sc_codes sub_title sub_body new_issue
|
|
while IFS= read -r lint_file; do
|
|
[ -z "$lint_file" ] && continue
|
|
# Extract errors for this file (try both formats)
|
|
file_errors=$(echo "$step_logs" | grep -F -A3 "In ${lint_file} line" 2>/dev/null | head -30 || true)
|
|
if [ -z "$file_errors" ]; then
|
|
file_errors=$(echo "$step_logs" | grep -F "${lint_file}:" | head -30 || true)
|
|
fi
|
|
[ -z "$file_errors" ] && continue
|
|
# Extract SC codes if present (harmless for non-ShellCheck output)
|
|
sc_codes=$(echo "$file_errors" | grep -oP 'SC\d+' | sort -u | tr '\n' ' ' | sed 's/ $//' || true)
|
|
|
|
sub_title="fix: lint errors in ${lint_file} (from PR #${ESC_PR})"
|
|
sub_body="## Lint CI failure — \`${lint_file}\`
|
|
|
|
Spawned by gardener from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)).
|
|
|
|
### Errors
|
|
\`\`\`
|
|
${file_errors}
|
|
\`\`\`
|
|
|
|
Fix all errors${sc_codes:+ (${sc_codes})} in \`${lint_file}\` so PR #${ESC_PR} CI passes.
|
|
|
|
### Context
|
|
- Parent issue: #${ESC_ISSUE}
|
|
- PR: #${ESC_PR}
|
|
- Pipeline: #${ESC_PIPELINE} (step: ${step_name})"
|
|
|
|
new_issue=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues" \
|
|
-d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \
|
|
'{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true
|
|
|
|
if [ -n "$new_issue" ]; then
|
|
log "Created sub-issue #${new_issue}: lint in ${lint_file} (from #${ESC_ISSUE})"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
_esc_total_created=$((_esc_total_created + 1))
|
|
matrix_send "gardener" "📋 Created sub-issue #${new_issue}: lint in ${lint_file} (from escalated #${ESC_ISSUE})" 2>/dev/null || true
|
|
fi
|
|
done <<< "$lint_files"
|
|
done <<< "$_PB_FAILED_STEPS"
|
|
}
|
|
|
|
# Create one combined issue for non-ShellCheck CI failures
|
|
playbook_create_generic_issue() {
|
|
local generic_fail="" step_pid step_name step_log_file step_logs esc_section
|
|
while IFS=$'\t' read -r step_pid step_name; do
|
|
[ -z "$step_pid" ] && continue
|
|
# Skip shellcheck steps (handled by shellcheck-per-file action)
|
|
echo "$step_name" | grep -qi "shellcheck" && continue
|
|
step_log_file="${_PB_LOG_DIR}/step-${step_pid}.log"
|
|
[ -f "$step_log_file" ] || continue
|
|
step_logs=$(cat "$step_log_file")
|
|
|
|
esc_section="=== ${step_name} ===
|
|
$(echo "$step_logs" | tail -50)"
|
|
if [ -z "$generic_fail" ]; then
|
|
generic_fail="$esc_section"
|
|
else
|
|
generic_fail="${generic_fail}
|
|
${esc_section}"
|
|
fi
|
|
done <<< "$_PB_FAILED_STEPS"
|
|
|
|
[ -z "$generic_fail" ] && return 0
|
|
|
|
local sub_title sub_body new_issue
|
|
sub_title="fix: CI failures in PR #${ESC_PR} (from issue #${ESC_ISSUE})"
|
|
sub_body="## CI failure — fix required
|
|
|
|
Spawned by gardener from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)).
|
|
|
|
### Failed step output
|
|
\`\`\`
|
|
${generic_fail}
|
|
\`\`\`
|
|
|
|
### Context
|
|
- Parent issue: #${ESC_ISSUE}
|
|
- PR: #${ESC_PR}${ESC_PIPELINE:+
|
|
- Pipeline: #${ESC_PIPELINE}}"
|
|
|
|
new_issue=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues" \
|
|
-d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \
|
|
'{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true
|
|
|
|
if [ -n "$new_issue" ]; then
|
|
log "Created sub-issue #${new_issue}: CI failures for PR #${ESC_PR} (from #${ESC_ISSUE})"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
_esc_total_created=$((_esc_total_created + 1))
|
|
matrix_send "gardener" "📋 Created sub-issue #${new_issue}: CI failures for PR #${ESC_PR} (from escalated #${ESC_ISSUE})" 2>/dev/null || true
|
|
fi
|
|
}
|
|
|
|
# Create issue to make failing CI step non-blocking (chicken-egg-ci)
|
|
playbook_make_step_non_blocking() {
|
|
local failing_steps sub_title sub_body new_issue
|
|
failing_steps=$(echo "$_PB_FAILED_STEPS" | cut -f2 | tr '\n' ', ' | sed 's/,$//' || true)
|
|
|
|
sub_title="fix: make CI step non-blocking for pre-existing failures (PR #${ESC_PR})"
|
|
sub_body="## Chicken-egg CI failure
|
|
|
|
PR #${ESC_PR} (issue #${ESC_ISSUE}) introduces a CI step that fails on pre-existing code.
|
|
|
|
Failing step(s): ${failing_steps}
|
|
|
|
### Playbook
|
|
1. Add \`|| true\` to the failing step(s) in the Woodpecker config
|
|
2. This makes the step advisory (non-blocking) until pre-existing violations are fixed
|
|
|
|
### Context
|
|
- Parent issue: #${ESC_ISSUE}
|
|
- PR: #${ESC_PR}${ESC_PIPELINE:+
|
|
- Pipeline: #${ESC_PIPELINE}}"
|
|
|
|
new_issue=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues" \
|
|
-d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \
|
|
'{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true
|
|
|
|
if [ -n "$new_issue" ]; then
|
|
log "Created #${new_issue}: make step non-blocking (chicken-egg from #${ESC_ISSUE})"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
_esc_total_created=$((_esc_total_created + 1))
|
|
matrix_send "gardener" "📋 Created #${new_issue}: make CI step non-blocking (chicken-egg, from #${ESC_ISSUE})" 2>/dev/null || true
|
|
fi
|
|
}
|
|
|
|
# Create follow-up issue to remove || true bypass (chicken-egg-ci)
|
|
playbook_create_followup_remove_bypass() {
|
|
local sub_title sub_body new_issue
|
|
sub_title="fix: remove || true bypass once pre-existing violations are fixed (PR #${ESC_PR})"
|
|
sub_body="## Follow-up: remove CI bypass
|
|
|
|
After all pre-existing violation issues from PR #${ESC_PR} are resolved, remove the \`|| true\` bypass from the CI step to make it blocking again.
|
|
|
|
### Depends on
|
|
All per-file fix issues created from escalated issue #${ESC_ISSUE}.
|
|
|
|
### Context
|
|
- Parent issue: #${ESC_ISSUE}
|
|
- PR: #${ESC_PR}"
|
|
|
|
new_issue=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues" \
|
|
-d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \
|
|
'{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true
|
|
|
|
if [ -n "$new_issue" ]; then
|
|
log "Created follow-up #${new_issue}: remove bypass (from #${ESC_ISSUE})"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
_esc_total_created=$((_esc_total_created + 1))
|
|
fi
|
|
}
|
|
|
|
# Rebase PR onto main branch (cascade-rebase)
|
|
playbook_rebase_pr() {
|
|
log "Rebasing PR #${ESC_PR} onto ${PRIMARY_BRANCH}"
|
|
local result
|
|
local http_code
|
|
http_code=$(curl -s -o /dev/null -w '%{http_code}' -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/pulls/${ESC_PR}/update" \
|
|
-d '{"style":"rebase"}' 2>/dev/null) || true
|
|
|
|
if [ "${http_code:-0}" -ge 200 ] && [ "${http_code:-0}" -lt 300 ]; then
|
|
log "Rebase initiated for PR #${ESC_PR} (HTTP ${http_code})"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
matrix_send "gardener" "🔄 Rebased PR #${ESC_PR} onto ${PRIMARY_BRANCH} (cascade-rebase, from #${ESC_ISSUE})" 2>/dev/null || true
|
|
else
|
|
log "WARNING: rebase API call failed for PR #${ESC_PR} (HTTP ${http_code:-error})"
|
|
fi
|
|
}
|
|
|
|
# Re-approve PR if review was dismissed by force-push (cascade-rebase)
|
|
playbook_re_approve_if_dismissed() {
|
|
local reviews dismissed
|
|
reviews=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
"${CODEBERG_API}/pulls/${ESC_PR}/reviews" 2>/dev/null || true)
|
|
[ -z "$reviews" ] || [ "$reviews" = "null" ] && return 0
|
|
|
|
dismissed=$(echo "$reviews" | jq -r '[.[] | select(.state == "APPROVED" and .dismissed == true)] | length' 2>/dev/null || true)
|
|
if [ "${dismissed:-0}" -gt 0 ]; then
|
|
curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/pulls/${ESC_PR}/reviews" \
|
|
-d '{"event":"APPROVED","body":"Re-approved after rebase (cascade-rebase recipe)"}' 2>/dev/null || true
|
|
log "Re-approved PR #${ESC_PR} after rebase"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
fi
|
|
}
|
|
|
|
# Retry merging the PR (cascade-rebase)
|
|
playbook_retry_merge() {
|
|
local result
|
|
result=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/pulls/${ESC_PR}/merge" \
|
|
-d '{"Do":"rebase","delete_branch_after_merge":true}' 2>/dev/null) || true
|
|
|
|
if [ -n "$result" ]; then
|
|
log "Merge retry initiated for PR #${ESC_PR}"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
matrix_send "gardener" "✅ Merge retry for PR #${ESC_PR} (cascade-rebase, from #${ESC_ISSUE})" 2>/dev/null || true
|
|
else
|
|
log "WARNING: merge retry failed for PR #${ESC_PR}"
|
|
fi
|
|
}
|
|
|
|
# Retrigger CI pipeline (flaky-test)
|
|
playbook_retrigger_ci() {
|
|
[ -z "$ESC_PIPELINE" ] && return 0
|
|
# Max 2 retriggers per issue spec
|
|
if [ "${ESC_ATTEMPTS:-1}" -ge 3 ]; then
|
|
log "Max retriggers reached for pipeline #${ESC_PIPELINE} (${ESC_ATTEMPTS} attempts)"
|
|
return 0
|
|
fi
|
|
log "Retriggering CI pipeline #${ESC_PIPELINE} (attempt ${ESC_ATTEMPTS})"
|
|
local http_code
|
|
http_code=$(curl -s -o /dev/null -w '%{http_code}' -X POST \
|
|
-H "Authorization: Bearer ${WOODPECKER_TOKEN}" \
|
|
"${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${ESC_PIPELINE}" 2>/dev/null) || true
|
|
|
|
if [ "${http_code:-0}" -ge 200 ] && [ "${http_code:-0}" -lt 300 ]; then
|
|
log "Pipeline #${ESC_PIPELINE} retriggered (HTTP ${http_code})"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
matrix_send "gardener" "🔄 Retriggered CI for PR #${ESC_PR} (flaky-test, attempt ${ESC_ATTEMPTS})" 2>/dev/null || true
|
|
else
|
|
log "WARNING: retrigger failed for pipeline #${ESC_PIPELINE} (HTTP ${http_code:-error})"
|
|
fi
|
|
}
|
|
|
|
# Quarantine flaky test and create fix issue (flaky-test)
|
|
playbook_quarantine_test() {
|
|
# Only quarantine if retriggers exhausted
|
|
if [ "${ESC_ATTEMPTS:-1}" -lt 3 ]; then
|
|
return 0
|
|
fi
|
|
|
|
local failing_steps sub_title sub_body new_issue
|
|
failing_steps=$(echo "$_PB_FAILED_STEPS" | cut -f2 | tr '\n' ', ' | sed 's/,$//' || true)
|
|
|
|
sub_title="fix: quarantine flaky test (PR #${ESC_PR}, from #${ESC_ISSUE})"
|
|
sub_body="## Flaky test detected
|
|
|
|
CI for PR #${ESC_PR} (issue #${ESC_ISSUE}) failed intermittently across ${ESC_ATTEMPTS} attempts.
|
|
|
|
Failing step(s): ${failing_steps:-unknown}
|
|
|
|
### Playbook
|
|
1. Identify the flaky test(s) from CI output
|
|
2. Quarantine (skip/mark pending) the flaky test(s)
|
|
3. Create targeted fix for the root cause
|
|
|
|
### Context
|
|
- Parent issue: #${ESC_ISSUE}
|
|
- PR: #${ESC_PR}${ESC_PIPELINE:+
|
|
- Pipeline: #${ESC_PIPELINE}}"
|
|
|
|
new_issue=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues" \
|
|
-d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \
|
|
'{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true
|
|
|
|
if [ -n "$new_issue" ]; then
|
|
log "Created quarantine issue #${new_issue} for flaky test (from #${ESC_ISSUE})"
|
|
_PB_SUB_CREATED=$((_PB_SUB_CREATED + 1))
|
|
_esc_total_created=$((_esc_total_created + 1))
|
|
matrix_send "gardener" "📋 Created #${new_issue}: quarantine flaky test (from #${ESC_ISSUE})" 2>/dev/null || true
|
|
fi
|
|
}
|
|
|
|
# run_playbook — Execute matched recipe's playbook actions
|
|
# Args: $1=recipe_json from match_recipe
|
|
run_playbook() {
|
|
local recipe_json="$1"
|
|
local recipe_name actions action
|
|
recipe_name=$(echo "$recipe_json" | jq -r '.name')
|
|
actions=$(echo "$recipe_json" | jq -r '.playbook[].action' 2>/dev/null || true)
|
|
|
|
while IFS= read -r action; do
|
|
[ -z "$action" ] && continue
|
|
case "$action" in
|
|
shellcheck-per-file) playbook_shellcheck_per_file ;;
|
|
lint-per-file) playbook_lint_per_file ;;
|
|
create-generic-issue) playbook_create_generic_issue ;;
|
|
make-step-non-blocking) playbook_make_step_non_blocking ;;
|
|
create-followup-remove-bypass) playbook_create_followup_remove_bypass ;;
|
|
rebase-pr) playbook_rebase_pr ;;
|
|
re-approve-if-dismissed) playbook_re_approve_if_dismissed ;;
|
|
retry-merge) playbook_retry_merge ;;
|
|
retrigger-ci) playbook_retrigger_ci ;;
|
|
quarantine-test) playbook_quarantine_test ;;
|
|
label-backlog) ;; # default label, no-op (issues created with backlog)
|
|
*) log "WARNING: unknown playbook action '${action}' in recipe '${recipe_name}'" ;;
|
|
esac
|
|
done <<< "$actions"
|
|
}
|
|
|
|
# ── Process dev-agent escalations (per-project, recipe-driven) ───────────
|
|
ESCALATION_FILE="${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.jsonl"
|
|
ESCALATION_DONE="${FACTORY_ROOT}/supervisor/escalations-${PROJECT_NAME}.done.jsonl"
|
|
|
|
if [ -s "$ESCALATION_FILE" ]; then
|
|
# Atomically snapshot the file before processing to prevent race with
|
|
# concurrent dev-poll appends: new entries go to a fresh ESCALATION_FILE
|
|
# while we process the snapshot, so nothing is ever silently dropped.
|
|
ESCALATION_SNAP="${ESCALATION_FILE}.processing.$$"
|
|
mv "$ESCALATION_FILE" "$ESCALATION_SNAP"
|
|
|
|
ESCALATION_COUNT=$(wc -l < "$ESCALATION_SNAP")
|
|
log "Processing ${ESCALATION_COUNT} escalation(s) for ${PROJECT_NAME}"
|
|
_esc_total_created=0
|
|
|
|
while IFS= read -r esc_entry; do
|
|
[ -z "$esc_entry" ] && continue
|
|
|
|
ESC_ISSUE=$(echo "$esc_entry" | jq -r '.issue // empty')
|
|
ESC_PR=$(echo "$esc_entry" | jq -r '.pr // empty')
|
|
ESC_ATTEMPTS=$(echo "$esc_entry" | jq -r '.attempts // 3')
|
|
ESC_REASON=$(echo "$esc_entry" | jq -r '.reason // empty')
|
|
|
|
if [ -z "$ESC_ISSUE" ] || [ -z "$ESC_PR" ]; then
|
|
echo "$esc_entry" >> "$ESCALATION_DONE"
|
|
continue
|
|
fi
|
|
|
|
log "Escalation: issue #${ESC_ISSUE} PR #${ESC_PR} reason=${ESC_REASON} (${ESC_ATTEMPTS} CI attempt(s))"
|
|
|
|
# Handle idle_timeout escalations — no CI steps to inspect, just notify
|
|
if [[ "$ESC_REASON" == idle_timeout* ]]; then
|
|
_issue_url="https://codeberg.org/${CODEBERG_REPO}/issues/${ESC_ISSUE}"
|
|
sub_title="chore: investigate idle timeout for issue #${ESC_ISSUE}"
|
|
sub_body="## Dev-agent idle timeout
|
|
|
|
The dev-agent session for issue #${ESC_ISSUE} was idle for 2h without a phase update and was killed.$([ "${ESC_PR:-0}" != "0" ] && printf '\n\nPR #%s may still be open.' "$ESC_PR")
|
|
|
|
### What to check
|
|
1. Was the agent stuck waiting for input? Check the issue spec for ambiguity.
|
|
2. Was there an infrastructure issue (tmux crash, disk full, etc.)?
|
|
3. Re-run the issue by restoring the \`backlog\` label if the spec is clear.
|
|
|
|
### Context
|
|
- Issue: [#${ESC_ISSUE}](${_issue_url})$([ "${ESC_PR:-0}" != "0" ] && printf '\n- PR: #%s' "$ESC_PR")"
|
|
|
|
new_issue=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues" \
|
|
-d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \
|
|
'{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true
|
|
|
|
if [ -n "$new_issue" ]; then
|
|
log "Created idle-timeout sub-issue #${new_issue} for #${ESC_ISSUE}"
|
|
_esc_total_created=$((_esc_total_created + 1))
|
|
matrix_send "gardener" "⏱ Created #${new_issue}: idle timeout on #${ESC_ISSUE}" 2>/dev/null || true
|
|
fi
|
|
|
|
echo "$esc_entry" >> "$ESCALATION_DONE"
|
|
continue
|
|
fi
|
|
|
|
# Fetch PR metadata (SHA, mergeable status)
|
|
ESC_PR_DATA=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
"${CODEBERG_API}/pulls/${ESC_PR}" 2>/dev/null || true)
|
|
ESC_PR_SHA=$(echo "$ESC_PR_DATA" | jq -r '.head.sha // ""' 2>/dev/null || true)
|
|
_PB_PR_MERGEABLE=$(echo "$ESC_PR_DATA" | jq '.mergeable // null' 2>/dev/null || true)
|
|
|
|
ESC_PIPELINE=""
|
|
if [ -n "$ESC_PR_SHA" ]; then
|
|
# Validate SHA is a 40-char hex string before interpolating into SQL
|
|
if [[ "$ESC_PR_SHA" =~ ^[0-9a-fA-F]{40}$ ]]; then
|
|
ESC_PIPELINE=$(wpdb -c "SELECT number FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND commit='${ESC_PR_SHA}' ORDER BY created DESC LIMIT 1;" 2>/dev/null | xargs || true)
|
|
else
|
|
log "WARNING: ESC_PR_SHA '${ESC_PR_SHA}' is not a valid hex SHA — skipping pipeline lookup"
|
|
fi
|
|
fi
|
|
|
|
# Fetch failed CI steps and their logs into temp dir
|
|
_PB_FAILED_STEPS=""
|
|
_PB_LOG_DIR=$(mktemp -d /tmp/recipe-logs-XXXXXX)
|
|
_PB_SUB_CREATED=0
|
|
_PB_LOGS_AVAILABLE=0
|
|
|
|
if [ -n "$ESC_PIPELINE" ]; then
|
|
_PB_FAILED_STEPS=$(curl -sf \
|
|
-H "Authorization: Bearer ${WOODPECKER_TOKEN}" \
|
|
"${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${ESC_PIPELINE}" 2>/dev/null | \
|
|
jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.pid)\t\(.name)"' 2>/dev/null || true)
|
|
|
|
while IFS=$'\t' read -r step_pid step_name; do
|
|
[ -z "$step_pid" ] && continue
|
|
[[ "$step_pid" =~ ^[0-9]+$ ]] || { log "WARNING: invalid step_pid '${step_pid}' — skipping"; continue; }
|
|
step_logs=$(woodpecker-cli pipeline log show "${CODEBERG_REPO}" "${ESC_PIPELINE}" "${step_pid}" 2>/dev/null | tail -150 || true)
|
|
if [ -n "$step_logs" ]; then
|
|
echo "$step_logs" > "${_PB_LOG_DIR}/step-${step_pid}.log"
|
|
_PB_LOGS_AVAILABLE=1
|
|
fi
|
|
done <<< "$_PB_FAILED_STEPS"
|
|
fi
|
|
|
|
# Fetch PR changed files for recipe matching
|
|
_PB_PR_FILES_JSON="[]"
|
|
_PB_PR_FILES=$(curl -sf -H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
"${CODEBERG_API}/pulls/${ESC_PR}/files" 2>/dev/null | jq -r '.[].filename // empty' 2>/dev/null || true)
|
|
if [ -n "$_PB_PR_FILES" ]; then
|
|
_PB_PR_FILES_JSON=$(echo "$_PB_PR_FILES" | jq -Rsc 'split("\n") | map(select(length > 0))')
|
|
fi
|
|
|
|
# Build recipe matching context
|
|
_RECIPE_STEP_NAMES=$(echo "$_PB_FAILED_STEPS" | cut -f2 | jq -Rsc 'split("\n") | map(select(length > 0))')
|
|
_RECIPE_OUTPUT_FILE="${_PB_LOG_DIR}/all-output.txt"
|
|
cat "${_PB_LOG_DIR}"/step-*.log > "$_RECIPE_OUTPUT_FILE" 2>/dev/null || touch "$_RECIPE_OUTPUT_FILE"
|
|
_RECIPE_PR_INFO=$(jq -nc \
|
|
--argjson m "${_PB_PR_MERGEABLE:-null}" \
|
|
--argjson a "${ESC_ATTEMPTS}" \
|
|
--argjson files "${_PB_PR_FILES_JSON}" \
|
|
'{mergeable:$m, attempts:$a, changed_files:$files}')
|
|
|
|
# Match escalation against recipes and execute playbook
|
|
MATCHED_RECIPE=$(match_recipe "$_RECIPE_STEP_NAMES" "$_RECIPE_OUTPUT_FILE" "$_RECIPE_PR_INFO")
|
|
RECIPE_NAME=$(echo "$MATCHED_RECIPE" | jq -r '.name')
|
|
log "Recipe matched: ${RECIPE_NAME} for #${ESC_ISSUE} PR #${ESC_PR}"
|
|
|
|
run_playbook "$MATCHED_RECIPE"
|
|
|
|
# Fallback: no sub-issues created — create investigation issue
|
|
if [ "$_PB_SUB_CREATED" -eq 0 ]; then
|
|
sub_title="fix: investigate CI failure for PR #${ESC_PR} (from issue #${ESC_ISSUE})"
|
|
if [ "$_PB_LOGS_AVAILABLE" -eq 1 ]; then
|
|
sub_body="## CI failure — investigation required
|
|
|
|
Spawned by gardener from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)). Recipe '${RECIPE_NAME}' matched but produced no sub-issues.
|
|
|
|
Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the PR can merge."
|
|
else
|
|
sub_body="## CI failure — investigation required
|
|
|
|
Spawned by gardener from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)). CI logs were unavailable at escalation time.
|
|
|
|
Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the PR can merge."
|
|
fi
|
|
|
|
new_issue=$(curl -sf -X POST \
|
|
-H "Authorization: token ${CODEBERG_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
"${CODEBERG_API}/issues" \
|
|
-d "$(jq -nc --arg t "$sub_title" --arg b "$sub_body" \
|
|
'{"title":$t,"body":$b,"labels":["backlog"]}')" 2>/dev/null | jq -r '.number // ""') || true
|
|
|
|
if [ -n "$new_issue" ]; then
|
|
log "Created fallback sub-issue #${new_issue} for escalated #${ESC_ISSUE}"
|
|
_esc_total_created=$((_esc_total_created + 1))
|
|
matrix_send "gardener" "📋 Created sub-issue #${new_issue}: investigate CI for PR #${ESC_PR} (from escalated #${ESC_ISSUE})" 2>/dev/null || true
|
|
fi
|
|
fi
|
|
|
|
# Cleanup temp files
|
|
rm -rf "$_PB_LOG_DIR"
|
|
|
|
# Mark as processed
|
|
echo "$esc_entry" >> "$ESCALATION_DONE"
|
|
done < "$ESCALATION_SNAP"
|
|
|
|
rm -f "$ESCALATION_SNAP"
|
|
log "Escalations processed — moved to $(basename "$ESCALATION_DONE")"
|
|
|
|
# Report resolution count to supervisor for its fixed() summary
|
|
if [ "${_esc_total_created:-0}" -gt 0 ]; then
|
|
printf '%d %s\n' "$_esc_total_created" "$PROJECT_NAME" \
|
|
>> "${FACTORY_ROOT}/supervisor/gardener-esc-resolved.log"
|
|
fi
|
|
fi
|
|
|
|
log "--- Gardener poll done ---"
|