From 174187f6a64a3e1fa8151805c6b3f172ab48bbce Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 13 Mar 2026 09:17:09 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20issue=20gardener=20=E2=80=94=20daily=20?= =?UTF-8?q?backlog=20grooming=20agent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bash pre-checks (zero tokens): duplicate titles, thin issues, stale issues, missing deps. Then claude -p for analysis and action. Escalates decisions in compact format: 1. #123 "title" — reason (a) opt1 (b) opt2 (c) opt3 Cron: daily 07:00 UTC. Light touch — grooms, doesn't invent work. --- gardener/best-practices.md | 53 +++++++++ gardener/gardener-poll.sh | 214 +++++++++++++++++++++++++++++++++++++ 2 files changed, 267 insertions(+) create mode 100644 gardener/best-practices.md create mode 100755 gardener/gardener-poll.sh diff --git a/gardener/best-practices.md b/gardener/best-practices.md new file mode 100644 index 0000000..d1ac8c1 --- /dev/null +++ b/gardener/best-practices.md @@ -0,0 +1,53 @@ +# Gardener Best Practices + +## What the gardener does +Keeps the issue backlog clean so the dev-agent always has well-structured work. +Runs daily (or 2x/day). Light touch — grooming, not development. + +## Issue quality checklist +A "ready" issue has: +- [ ] Clear title (feat/fix/refactor prefix) +- [ ] Acceptance criteria with checkboxes +- [ ] Affected files listed +- [ ] Dependencies section (if any) +- [ ] No duplicate in the backlog + +## When to close +- **Duplicate**: newer issue closed, comment links to older one +- **Superseded**: explicitly replaced by another issue (link it) +- **Stale + irrelevant**: no activity 14+ days AND no longer makes sense given current state +- **Completed elsewhere**: work was done in another PR without referencing the issue + +## When to escalate (NEVER decide these) +- Issue scope is ambiguous — could be interpreted multiple ways +- Two issues overlap but aren't exact duplicates — need human to pick scope +- Issue contradicts a design decision (check PRODUCT-TRUTH.md, ARCHITECTURE.md) +- Issue is feature request vs bug — classification matters for priority +- Closing would lose important context that isn't captured elsewhere + +## Escalation format +Compact, decision-ready. Human should be able to reply "1a 2c 3b" and be done. + +``` +🌱 Issue Gardener — 3 items need attention + +1. #123 "Push3 gas optimization" — duplicate of #456 "optimizer gas limit"? + (a) close #123 (b) close #456 (c) keep both, different scope +2. #789 "refactor VWAPTracker" — stale 21 days, VWAP was rewritten in #603 + (a) close as superseded (b) reopen with updated scope (c) keep, still relevant +3. #234 "landing page A/B test" — 8 acceptance criteria spanning 4 packages + (a) split into: UI variants, analytics, config, deployment (b) keep as-is +``` + +## What NOT to do +- Don't create new feature issues — gardener grooms, doesn't invent work +- Don't change issue priority/labels beyond adding missing deps +- Don't modify acceptance criteria that are already well-written +- Don't close issues that are actively being worked on (check for open PRs) +- Don't rate-limit yourself — max 10 API calls per run for issue reads, 5 for writes + +## Lessons learned +- Review bot hallucination rate is ~15% — gardener should verify claims about code before acting +- Dev-agent doesn't understand the product — clear acceptance criteria save 2-3 CI cycles +- Feature issues MUST list affected e2e test files +- Issue templates from ISSUE-TEMPLATES.md propagate via triage gate diff --git a/gardener/gardener-poll.sh b/gardener/gardener-poll.sh new file mode 100755 index 0000000..8650aa8 --- /dev/null +++ b/gardener/gardener-poll.sh @@ -0,0 +1,214 @@ +#!/usr/bin/env bash +# ============================================================================= +# gardener-poll.sh — Issue backlog grooming agent +# +# Cron: daily (or 2x/day). Reads open issues, detects problems, invokes +# claude -p to fix or escalate. +# +# Problems detected (bash, zero tokens): +# - Duplicate titles / overlapping scope +# - Missing acceptance criteria +# - Missing dependencies (references other issues but no dep link) +# - Oversized issues (too many acceptance criteria or change files) +# - Stale issues (no activity > 14 days, still open) +# - Closed issues with open dependents still referencing them +# +# Actions taken (claude -p): +# - Close duplicates with cross-reference comment +# - Add acceptance criteria template +# - Set dependency labels +# - Split oversized issues (create sub-issues, close parent) +# - Escalate decisions to human via openclaw system event +# +# Escalation format (compact, decision-ready): +# 🌱 Issue Gardener — N items need attention +# 1. #123 "title" — duplicate of #456? (a) close #123 (b) close #456 (c) merge scope +# 2. #789 "title" — needs decision: (a) backlog (b) wontfix (c) split into X,Y +# ============================================================================= +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" + +# shellcheck source=../lib/env.sh +source "$FACTORY_ROOT/lib/env.sh" + +LOG_FILE="$SCRIPT_DIR/gardener.log" +LOCK_FILE="/tmp/gardener-poll.lock" +CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}" + +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } + +# ── Lock ────────────────────────────────────────────────────────────────── +if [ -f "$LOCK_FILE" ]; then + LOCK_PID=$(cat "$LOCK_FILE" 2>/dev/null || true) + if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then + log "poll: gardener running (PID $LOCK_PID)" + exit 0 + fi + rm -f "$LOCK_FILE" +fi +echo $$ > "$LOCK_FILE" +trap 'rm -f "$LOCK_FILE"' EXIT + +log "--- Gardener poll start ---" + +# ── Fetch all open issues ───────────────────────────────────────────────── +ISSUES_JSON=$(codeberg_api GET "/issues?state=open&type=issues&limit=50&sort=updated&direction=desc" 2>/dev/null || true) +if [ -z "$ISSUES_JSON" ] || [ "$ISSUES_JSON" = "null" ]; then + log "Failed to fetch issues" + exit 1 +fi + +ISSUE_COUNT=$(echo "$ISSUES_JSON" | jq 'length') +log "Found $ISSUE_COUNT open issues" + +if [ "$ISSUE_COUNT" -eq 0 ]; then + log "No open issues — nothing to groom" + exit 0 +fi + +# ── Bash pre-checks (zero tokens) ──────────────────────────────────────── + +PROBLEMS="" + +# 1. Duplicate detection: issues with very similar titles +TITLES=$(echo "$ISSUES_JSON" | jq -r '.[] | "\(.number)\t\(.title)"') +DUPES="" +while IFS=$'\t' read -r num1 title1; do + while IFS=$'\t' read -r num2 title2; do + [ "$num1" -ge "$num2" ] && continue + # Normalize: lowercase, strip common prefixes, collapse whitespace + t1=$(echo "$title1" | tr '[:upper:]' '[:lower:]' | sed 's/^feat:\|^fix:\|^refactor://;s/[^a-z0-9 ]//g;s/ */ /g') + t2=$(echo "$title2" | tr '[:upper:]' '[:lower:]' | sed 's/^feat:\|^fix:\|^refactor://;s/[^a-z0-9 ]//g;s/ */ /g') + # Count shared words (>60% overlap = suspect) + WORDS1=$(echo "$t1" | tr ' ' '\n' | sort -u) + WORDS2=$(echo "$t2" | tr ' ' '\n' | sort -u) + SHARED=$(comm -12 <(echo "$WORDS1") <(echo "$WORDS2") | wc -l) + TOTAL1=$(echo "$WORDS1" | wc -l) + TOTAL2=$(echo "$WORDS2" | wc -l) + MIN_TOTAL=$(( TOTAL1 < TOTAL2 ? TOTAL1 : TOTAL2 )) + if [ "$MIN_TOTAL" -gt 2 ] && [ "$SHARED" -gt 0 ]; then + OVERLAP=$(( SHARED * 100 / MIN_TOTAL )) + if [ "$OVERLAP" -ge 60 ]; then + DUPES="${DUPES}possible_dupe: #${num1} vs #${num2} (${OVERLAP}% word overlap)\n" + fi + fi + done <<< "$TITLES" +done <<< "$TITLES" +[ -n "$DUPES" ] && PROBLEMS="${PROBLEMS}${DUPES}" + +# 2. Missing acceptance criteria: issues with short body and no checkboxes +while IFS=$'\t' read -r num body_len has_checkbox; do + if [ "$body_len" -lt 100 ] && [ "$has_checkbox" = "false" ]; then + PROBLEMS="${PROBLEMS}thin_issue: #${num} — body < 100 chars, no acceptance criteria\n" + fi +done < <(echo "$ISSUES_JSON" | jq -r '.[] | "\(.number)\t\(.body | length)\t\(.body | test("- \\[[ x]\\]") // false)"') + +# 3. Stale issues: no update in 14+ days +NOW_EPOCH=$(date +%s) +while IFS=$'\t' read -r num updated_at; do + UPDATED_EPOCH=$(date -d "$updated_at" +%s 2>/dev/null || echo 0) + AGE_DAYS=$(( (NOW_EPOCH - UPDATED_EPOCH) / 86400 )) + if [ "$AGE_DAYS" -ge 14 ]; then + PROBLEMS="${PROBLEMS}stale: #${num} — no activity for ${AGE_DAYS} days\n" + fi +done < <(echo "$ISSUES_JSON" | jq -r '.[] | "\(.number)\t\(.updated_at)"') + +# 4. Issues referencing closed deps +while IFS=$'\t' read -r num body; do + REFS=$(echo "$body" | grep -oP '#\d+' | grep -oP '\d+' | sort -u || true) + for ref in $REFS; do + [ "$ref" = "$num" ] && continue + REF_STATE=$(echo "$ISSUES_JSON" | jq -r --arg n "$ref" '.[] | select(.number == ($n | tonumber)) | .state' 2>/dev/null || true) + # If ref not in our open set, check if it's closed + if [ -z "$REF_STATE" ]; then + REF_STATE=$(codeberg_api GET "/issues/$ref" 2>/dev/null | jq -r '.state // "unknown"' 2>/dev/null || true) + # Rate limit protection + sleep 0.5 + fi + done +done < <(echo "$ISSUES_JSON" | jq -r '.[] | "\(.number)\t\(.body // "")"' | head -20) + +PROBLEM_COUNT=$(echo -e "$PROBLEMS" | grep -c '.' || true) +log "Detected $PROBLEM_COUNT potential problems" + +if [ "$PROBLEM_COUNT" -eq 0 ]; then + log "Backlog is clean — nothing to groom" + exit 0 +fi + +# ── Invoke claude -p ────────────────────────────────────────────────────── +log "Invoking claude -p for grooming" + +# Build issue summary for context (titles + labels + deps) +ISSUE_SUMMARY=$(echo "$ISSUES_JSON" | jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"') + +PROMPT="You are harb's issue gardener. Your job: keep the backlog clean, well-structured, and actionable. + +## Current open issues +$ISSUE_SUMMARY + +## Problems detected +$(echo -e "$PROBLEMS") + +## Tools available +- Codeberg API via curl (token in CODEBERG_TOKEN env var) +- Base URL: https://codeberg.org/api/v1/repos/johba/harb + +## Rules +1. **Duplicates**: If confident (>80% overlap + same scope after reading bodies), close the newer one with a comment referencing the older. If unsure, ESCALATE. +2. **Thin issues**: Add a standard acceptance criteria section: \`## Acceptance Criteria\n- [ ] ...\`. Read the body first to understand intent. +3. **Stale issues**: If clearly superseded or no longer relevant, close with explanation. If unclear, ESCALATE. +4. **Oversized issues**: If an issue has >5 acceptance criteria touching different files/concerns, propose a split. Don't split automatically — ESCALATE with suggested breakdown. +5. **Dependencies**: If an issue references another that must land first, add a \`## Dependencies\n- #NNN\` section if missing. + +## Escalation format +For anything needing human decision, output EXACTLY this format (one block, all items): +\`\`\` +ESCALATE +1. #NNN \"title\" — reason (a) option1 (b) option2 (c) option3 +2. #NNN \"title\" — reason (a) option1 (b) option2 +\`\`\` + +## Output +- For each action taken, print: ACTION: +- For escalations, use the ESCALATE block above +- If nothing to do after analysis, print: CLEAN" + +CLAUDE_OUTPUT=$(cd /home/debian/harb && timeout "$CLAUDE_TIMEOUT" \ + claude -p "$PROMPT" \ + --model sonnet \ + --dangerously-skip-permissions \ + --max-turns 10 \ + 2>/dev/null) || true + +log "claude finished ($(echo "$CLAUDE_OUTPUT" | wc -c) bytes)" + +# ── Parse escalations ──────────────────────────────────────────────────── +ESCALATION=$(echo "$CLAUDE_OUTPUT" | sed -n '/^ESCALATE$/,/^```$/p' | grep -v '^ESCALATE$\|^```$' || true) +if [ -z "$ESCALATION" ]; then + ESCALATION=$(echo "$CLAUDE_OUTPUT" | grep -A50 "^ESCALATE" | grep '^\d' || true) +fi + +if [ -n "$ESCALATION" ]; then + ITEM_COUNT=$(echo "$ESCALATION" | grep -c '.' || true) + log "Escalating $ITEM_COUNT items to human" + + # Send via openclaw system event + openclaw system event "🌱 Issue Gardener — ${ITEM_COUNT} item(s) need attention + +${ESCALATION} + +Reply with numbers+letters (e.g. 1a 2c) to decide." 2>/dev/null || true +fi + +# ── Log actions taken ───────────────────────────────────────────────────── +ACTIONS=$(echo "$CLAUDE_OUTPUT" | grep "^ACTION:" || true) +if [ -n "$ACTIONS" ]; then + echo "$ACTIONS" | while read -r line; do + log " $line" + done +fi + +log "--- Gardener poll done ---"