From 4c7baff05d1ae71920da71a2922eb93e16774c68 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 21 Mar 2026 10:41:31 +0000 Subject: [PATCH] fix: Two parallel dust-bundling codepaths need consolidation (#368) Remove the orphaned post-session dust accumulator from gardener-agent.sh (no longer reached after #367 moved gardener-poll.sh to action issues). Add a dedicated dust-bundling formula step to run-gardener.toml that handles the full lifecycle: dedup, timestamps, 30-day TTL expiry, and bundling groups with 3+ items into backlog issues. Co-Authored-By: Claude Opus 4.6 (1M context) --- AGENTS.md | 4 +- formulas/groom-backlog.toml | 5 +- formulas/run-gardener.toml | 73 ++++++++++++++++--- gardener/gardener-agent.sh | 136 ++---------------------------------- 4 files changed, 73 insertions(+), 145 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 6b4624b..a0df5aa 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -133,8 +133,8 @@ issue is filed against). **Key files**: - `gardener/gardener-run.sh` — Cron wrapper: lock, memory guard, dedup check, files action issue - `gardener/gardener-poll.sh` — Escalation-reply injection for dev sessions, invokes gardener-agent.sh for grooming -- `gardener/gardener-agent.sh` — Orchestrator: bash pre-analysis, creates tmux session (`gardener-{project}`) with interactive `claude`, monitors phase file, parses result file (ACTION:/DUST:/ESCALATE), handles dust bundling -- `formulas/run-gardener.toml` — Execution spec: preflight, grooming, blocked-review, agents-update, commit-and-pr +- `gardener/gardener-agent.sh` — Orchestrator: bash pre-analysis, creates tmux session (`gardener-{project}`) with interactive `claude`, monitors phase file, parses result file (ACTION:/DUST:/ESCALATE) +- `formulas/run-gardener.toml` — Execution spec: preflight, grooming, dust-bundling, blocked-review, agents-update, commit-and-pr **Environment variables consumed**: - `CODEBERG_TOKEN`, `CODEBERG_REPO`, `CODEBERG_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT` diff --git a/formulas/groom-backlog.toml b/formulas/groom-backlog.toml index d561ec2..4853a6b 100644 --- a/formulas/groom-backlog.toml +++ b/formulas/groom-backlog.toml @@ -77,7 +77,7 @@ PROMOTE (substantial work — multi-file, behavioral, architectural, security): DUST (trivial — single-line edit, rename, comment, style, whitespace): Write: echo 'DUST: {"issue": NNN, "group": "", "title": "...", "reason": "..."}' >> "$RESULT_FILE" Group by file or subsystem: e.g. "gardener", "lib/env.sh", "dev-poll" - Do NOT close the issue — the script auto-bundles groups of 3+ into a backlog issue. + Do NOT close dust issues — the dust-bundling formula step auto-bundles groups of 3+ into a backlog issue. DUPLICATE (>80% overlap after reading both bodies — confirm before closing): Post comment: curl -X POST ... /issues/NNN/comments -d '{"body":"Duplicate of #OLDER"}' @@ -111,7 +111,8 @@ For tier-2 items (trivial, cosmetic, score < 1.0): - Close duplicates with cross-reference comment These do not need promotion — just classification so they leave the tech-debt queue. -The script handles the bundling; emit correct DUST lines for each item. +The dust-bundling formula step handles accumulation, dedup, TTL, and bundling; +emit correct DUST lines for each item. """ needs = ["process-scored"] diff --git a/formulas/run-gardener.toml b/formulas/run-gardener.toml index 95ed054..9513527 100644 --- a/formulas/run-gardener.toml +++ b/formulas/run-gardener.toml @@ -7,7 +7,7 @@ # No memory, no journal. The gardener does mechanical housekeeping # based on current state — it doesn't need to remember past runs. # -# Steps: preflight → grooming → blocked-review → agents-update → commit-and-pr +# Steps: preflight → grooming → dust-bundling → blocked-review → agents-update → commit-and-pr name = "run-gardener" description = "Mechanical housekeeping: grooming, blocked review, docs update" @@ -83,8 +83,8 @@ ACTION (substantial — promote, close duplicate, add acceptance criteria): DUST (trivial — single-line edit, rename, comment, style, whitespace): echo 'DUST: {"issue": NNN, "group": "", "title": "...", "reason": "..."}' >> "$RESULT_FILE" Group by file or subsystem (e.g. "gardener", "lib/env.sh", "dev-poll"). - Do NOT close dust issues — the script auto-bundles groups of 3+ into - one backlog issue. + Do NOT close dust issues — the dust-bundling step auto-bundles groups + of 3+ into one backlog issue. ESCALATE (needs human decision): printf 'ESCALATE\n1. #NNN "title" — reason (a) option1 (b) option2\n' >> "$RESULT_FILE" @@ -106,17 +106,70 @@ Processing order: 2. Process tech-debt issues by score (impact/effort) 3. Classify remaining items as dust or escalate -After processing, dust items are collected into gardener/dust.jsonl. -When a group accumulates 3+ distinct issues, create one bundled backlog -issue, close the source issues with cross-reference comments, and remove -bundled items from the staging file. +Do NOT bundle dust yourself — the dust-bundling step handles accumulation, +dedup, TTL expiry, and bundling into backlog issues. CRITICAL: If this step fails for any reason, log the failure and move on. """ needs = ["preflight"] # ───────────────────────────────────────────────────────────────────── -# Step 3: blocked-review — triage blocked issues +# Step 3: dust-bundling — accumulate, expire, and bundle dust items +# ───────────────────────────────────────────────────────────────────── + +[[steps]] +id = "dust-bundling" +title = "Accumulate dust, expire stale entries, and bundle groups" +description = """ +Process DUST items emitted during grooming. This step maintains the +persistent dust accumulator at $PROJECT_REPO_ROOT/gardener/dust.jsonl. + +IMPORTANT: Use $PROJECT_REPO_ROOT/gardener/dust.jsonl (the main repo +checkout), NOT the worktree copy — the worktree is destroyed after the +session, so changes there would be lost. + +1. Collect DUST JSON lines emitted during grooming (from the result file + or your notes). Each has: {"issue": NNN, "group": "...", "title": "...", "reason": "..."} + +2. Deduplicate: read existing dust.jsonl and skip any issue numbers that + are already staged: + DUST_FILE="$PROJECT_REPO_ROOT/gardener/dust.jsonl" + touch "$DUST_FILE" + EXISTING=$(jq -r '.issue' "$DUST_FILE" 2>/dev/null | sort -nu || true) + For each new dust item, check if its issue number is in EXISTING. + Add new entries with a timestamp: + echo '{"issue":NNN,"group":"...","title":"...","reason":"...","ts":"YYYY-MM-DDTHH:MM:SSZ"}' >> "$DUST_FILE" + +3. Expire stale entries (30-day TTL): + CUTOFF=$(date -u -d '30 days ago' +%Y-%m-%dT%H:%M:%SZ) + jq -c --arg c "$CUTOFF" 'select(.ts >= $c)' "$DUST_FILE" > "${DUST_FILE}.tmp" && mv "${DUST_FILE}.tmp" "$DUST_FILE" + +4. Bundle groups with 3+ distinct issues: + a. Count distinct issues per group: + jq -r '[.group, (.issue | tostring)] | join("\\t")' "$DUST_FILE" | sort -u | cut -f1 | sort | uniq -c | sort -rn + b. For each group with count >= 3: + - Collect issue details and distinct issue numbers for the group + - Look up the backlog label ID: + BACKLOG_LABEL_ID=$(curl -sf -H "Authorization: token $CODEBERG_TOKEN" \ + "$CODEBERG_API/labels" | jq -r '.[] | select(.name == "backlog") | .id') + - Create a bundled backlog issue: + curl -sf -X POST -H "Authorization: token $CODEBERG_TOKEN" \ + -H "Content-Type: application/json" "$CODEBERG_API/issues" \ + -d '{"title":"fix: bundled dust cleanup — GROUP","body":"...","labels":[LABEL_ID]}' + - Close each source issue with a cross-reference comment: + curl ... "$CODEBERG_API/issues/NNN/comments" -d '{"body":"Bundled into #NEW"}' + curl ... "$CODEBERG_API/issues/NNN" -d '{"state":"closed"}' + - Remove bundled items from dust.jsonl: + jq -c --arg g "GROUP" 'select(.group != $g)' "$DUST_FILE" > "${DUST_FILE}.tmp" && mv "${DUST_FILE}.tmp" "$DUST_FILE" + +5. If no DUST items were emitted and no groups are ripe, skip this step. + +CRITICAL: If this step fails, log the failure and move on to blocked-review. +""" +needs = ["grooming"] + +# ───────────────────────────────────────────────────────────────────── +# Step 4: blocked-review — triage blocked issues # ───────────────────────────────────────────────────────────────────── [[steps]] @@ -180,10 +233,10 @@ Review all issues labeled 'blocked' and decide their fate. CRITICAL: If this step fails, log the failure and move on. """ -needs = ["grooming"] +needs = ["dust-bundling"] # ───────────────────────────────────────────────────────────────────── -# Step 4: agents-update — AGENTS.md watermark staleness check +# Step 5: agents-update — AGENTS.md watermark staleness check # ───────────────────────────────────────────────────────────────────── [[steps]] diff --git a/gardener/gardener-agent.sh b/gardener/gardener-agent.sh index 868076d..e517cb4 100644 --- a/gardener/gardener-agent.sh +++ b/gardener/gardener-agent.sh @@ -36,7 +36,6 @@ LOG_FILE="$SCRIPT_DIR/gardener.log" SESSION_NAME="gardener-${PROJECT_NAME}" PHASE_FILE="/tmp/gardener-session-${PROJECT_NAME}.phase" RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt" -DUST_FILE="$SCRIPT_DIR/dust.jsonl" SCRATCH_FILE="/tmp/gardener-${PROJECT_NAME}-scratch.md" # shellcheck disable=SC2034 # read by monitor_phase_loop in lib/agent-session.sh @@ -192,11 +191,6 @@ done # ── Build issue context ──────────────────────────────────────────────────── ISSUE_SUMMARY=$(echo "$ISSUES_JSON" | jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"') -STAGED_DUST="" -if [ -s "$DUST_FILE" ]; then - STAGED_DUST=$(jq -r '"#\(.issue) (\(.group))"' "$DUST_FILE" 2>/dev/null | sort -u || true) -fi - # ── Build optional prompt sections ──────────────────────────────────────── CONTEXT_SECTION="" if [ -n "$CONTEXT_BLOCK" ]; then @@ -204,13 +198,6 @@ if [ -n "$CONTEXT_BLOCK" ]; then ${CONTEXT_BLOCK}" fi -STAGED_DUST_SECTION="" -if [ -n "$STAGED_DUST" ]; then - STAGED_DUST_SECTION=" -### Already staged as dust — do NOT re-emit DUST for these -${STAGED_DUST}" -fi - ESCALATION_SECTION="" if [ -n "$ESCALATION_REPLY" ]; then ESCALATION_SECTION=" @@ -241,7 +228,7 @@ ${FORMULA_CONTENT} ${ISSUE_SUMMARY} ### Problems detected -$(echo -e "$PROBLEMS")${STAGED_DUST_SECTION}${ESCALATION_SECTION} +$(echo -e "$PROBLEMS")${ESCALATION_SECTION} ## Codeberg API reference Base URL: ${CODEBERG_API} Auth header: -H \"Authorization: token \$CODEBERG_TOKEN\" @@ -357,124 +344,11 @@ if [ -n "$ACTIONS" ]; then done fi -# ── Collect dust items ──────────────────────────────────────────────────── -# DUST_FILE already set above (before prompt construction) -DUST_LINES=$(echo "$CLAUDE_OUTPUT" | grep "^DUST: " | sed 's/^DUST: //' || true) +# ── Log dust items (bundling handled by run-gardener formula step) ──── +DUST_LINES=$(echo "$CLAUDE_OUTPUT" | grep "^DUST: " || true) if [ -n "$DUST_LINES" ]; then - # Build set of issue numbers already in dust.jsonl for dedup - EXISTING_DUST_ISSUES="" - if [ -s "$DUST_FILE" ]; then - EXISTING_DUST_ISSUES=$(jq -r '.issue' "$DUST_FILE" 2>/dev/null | sort -nu || true) - fi - - DUST_COUNT=0 - while IFS= read -r dust_json; do - [ -z "$dust_json" ] && continue - # Validate JSON - if ! echo "$dust_json" | jq -e '.issue and .group' >/dev/null 2>&1; then - log "WARNING: invalid dust JSON: $dust_json" - continue - fi - # Deduplicate: skip if this issue is already staged - dust_issue_num=$(echo "$dust_json" | jq -r '.issue') - if echo "$EXISTING_DUST_ISSUES" | grep -qx "$dust_issue_num" 2>/dev/null; then - log "Skipping duplicate dust entry for issue #${dust_issue_num}" - continue - fi - EXISTING_DUST_ISSUES="${EXISTING_DUST_ISSUES} -${dust_issue_num}" - echo "$dust_json" | jq -c '. + {"ts": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$DUST_FILE" - DUST_COUNT=$((DUST_COUNT + 1)) - done <<< "$DUST_LINES" - log "Collected $DUST_COUNT dust item(s) (duplicates skipped)" -fi - -# ── Expire stale dust entries (30-day TTL) ─────────────────────────────── -if [ -s "$DUST_FILE" ]; then - CUTOFF=$(date -u -d '30 days ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || true) - if [ -n "$CUTOFF" ]; then - BEFORE_COUNT=$(wc -l < "$DUST_FILE") - if jq -c --arg c "$CUTOFF" 'select(.ts >= $c)' "$DUST_FILE" > "${DUST_FILE}.ttl" 2>/dev/null; then - mv "${DUST_FILE}.ttl" "$DUST_FILE" - AFTER_COUNT=$(wc -l < "$DUST_FILE") - EXPIRED=$((BEFORE_COUNT - AFTER_COUNT)) - [ "$EXPIRED" -gt 0 ] && log "Expired $EXPIRED stale dust entries (>30 days old)" - else - rm -f "${DUST_FILE}.ttl" - log "WARNING: TTL cleanup failed — dust.jsonl left unchanged" - fi - fi -fi - -# ── Bundle dust groups with 3+ distinct issues ────────────────────────── -if [ -s "$DUST_FILE" ]; then - # Count distinct issues per group (not raw entries) - DUST_GROUPS=$(jq -r '[.group, (.issue | tostring)] | join("\t")' "$DUST_FILE" 2>/dev/null \ - | sort -u | cut -f1 | sort | uniq -c | sort -rn || true) - while read -r count group; do - [ -z "$group" ] && continue - [ "$count" -lt 3 ] && continue - - log "Bundling dust group '$group' ($count distinct issues)" - - # Collect deduplicated issue references and details for this group - BUNDLE_ISSUES=$(jq -r --arg g "$group" 'select(.group == $g) | "#\(.issue) \(.title // "untitled") — \(.reason // "dust")"' "$DUST_FILE" | sort -u) - BUNDLE_ISSUE_NUMS=$(jq -r --arg g "$group" 'select(.group == $g) | .issue' "$DUST_FILE" | sort -nu) - DISTINCT_COUNT=$(echo "$BUNDLE_ISSUE_NUMS" | grep -c '.' || true) - - bundle_title="fix: bundled dust cleanup — ${group}" - bundle_body="## Bundled dust cleanup — \`${group}\` - -Gardener bundled ${DISTINCT_COUNT} trivial tech-debt items into one issue to save factory cycles. - -### Items -$(echo "$BUNDLE_ISSUES" | sed 's/^/- /') - -### Instructions -Fix all items above in a single PR. Each is a small change (rename, comment, style fix, single-line edit). - -### Affected files -- Files in \`${group}\` subsystem - -### Acceptance criteria -- [ ] All listed items resolved -- [ ] ShellCheck passes" - - new_bundle=$(curl -sf -X POST \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${CODEBERG_API}/issues" \ - -d "$(jq -nc --arg t "$bundle_title" --arg b "$bundle_body" \ - --argjson lid "$BACKLOG_LABEL_ID" '{"title":$t,"body":$b,"labels":[$lid]}')" 2>/dev/null | jq -r '.number // ""') || true - - if [ -n "$new_bundle" ]; then - log "Created bundle issue #${new_bundle} for dust group '$group' ($DISTINCT_COUNT items)" - matrix_send "gardener" "📦 Bundled ${DISTINCT_COUNT} dust items (${group}) → #${new_bundle}" 2>/dev/null || true - - # Close source issues with cross-reference - for src_issue in $BUNDLE_ISSUE_NUMS; do - curl -sf -X POST \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${CODEBERG_API}/issues/${src_issue}/comments" \ - -d "$(jq -nc --arg b "Bundled into #${new_bundle} (dust cleanup)" '{"body":$b}')" 2>/dev/null || true - curl -sf -X PATCH \ - -H "Authorization: token ${CODEBERG_TOKEN}" \ - -H "Content-Type: application/json" \ - "${CODEBERG_API}/issues/${src_issue}" \ - -d '{"state":"closed"}' 2>/dev/null || true - log "Closed source issue #${src_issue} → bundled into #${new_bundle}" - done - - # Remove bundled items from dust.jsonl — only if jq succeeds - if jq -c --arg g "$group" 'select(.group != $g)' "$DUST_FILE" > "${DUST_FILE}.tmp" 2>/dev/null; then - mv "${DUST_FILE}.tmp" "$DUST_FILE" - else - rm -f "${DUST_FILE}.tmp" - log "WARNING: failed to prune bundled group '$group' from dust.jsonl" - fi - fi - done <<< "$DUST_GROUPS" + DUST_COUNT=$(echo "$DUST_LINES" | grep -c '.' || true) + log "Dust items reported: $DUST_COUNT (bundling handled by run-gardener formula)" fi # ── Cleanup scratch file on normal exit ──────────────────────────────────