diff --git a/gardener/gardener-poll.sh b/gardener/gardener-poll.sh index ced23ff..234d7b1 100755 --- a/gardener/gardener-poll.sh +++ b/gardener/gardener-poll.sh @@ -196,6 +196,7 @@ Process up to 10 tech-debt issues per run (stay within API rate limits). 3. **Stale issues**: If clearly superseded or no longer relevant, close with explanation. If unclear, ESCALATE. 4. **Oversized issues**: If >5 acceptance criteria touching different files/concerns, ESCALATE with suggested split. 5. **Dependencies**: If an issue references another that must land first, add a \`## Dependencies\n- #NNN\` section if missing. +6. **Sibling issues**: When creating multiple issues from the same source (PR review, code audit), NEVER add bidirectional dependencies between them. Siblings are independent work items, not parent/child. Use \`## Related\n- #NNN (sibling)\` for cross-references between siblings — NOT \`## Dependencies\`. The dev-poll \`get_deps()\` parser only reads \`## Dependencies\` / \`## Depends on\` / \`## Blocked by\` headers, so \`## Related\` is safely ignored. Bidirectional deps create permanent deadlocks that stall the entire factory. ## Escalation format For anything needing human decision, output EXACTLY this format (one block, all items): diff --git a/supervisor/PROMPT.md b/supervisor/PROMPT.md index b2b1d02..007ad1a 100644 --- a/supervisor/PROMPT.md +++ b/supervisor/PROMPT.md @@ -7,8 +7,8 @@ You are the supervisor agent for `$CODEBERG_REPO`. You were called because 1. **P0 — Memory crisis:** RAM <500MB or swap >3GB 2. **P1 — Disk pressure:** Disk >80% -3. **P2 — Factory stopped:** Dev-agent dead, CI down, git broken -4. **P3 — Factory degraded:** Derailed PR, stuck pipeline, unreviewed PRs +3. **P2 — Factory stopped:** Dev-agent dead, CI down, git broken, all backlog dep-blocked +4. **P3 — Factory degraded:** Derailed PR, stuck pipeline, unreviewed PRs, circular deps, stale deps 5. **P4 — Housekeeping:** Stale processes, log rotation ## What You Can Do @@ -42,6 +42,44 @@ This gives you: - `$FACTORY_ROOT` — path to the disinto repo - `matrix_send ` — send notifications to the Matrix coordination room +## Handling Dependency Alerts + +### Circular dependencies (P3) +When you see "Circular dependency deadlock: #A -> #B -> #A", the backlog is permanently +stuck. Your job: figure out the correct dependency direction and fix the wrong one. + +1. Read both issue bodies: `codeberg_api GET "/issues/A"`, `codeberg_api GET "/issues/B"` +2. Read the referenced source files in `$PROJECT_REPO_ROOT` to understand which change + actually depends on which +3. Edit the issue that has the incorrect dep to remove the `#NNN` reference from its + `## Dependencies` section (replace with `- None` if it was the only dep) +4. If the correct direction is unclear from code, escalate with both issue summaries + +Use the Codeberg API to edit issue bodies: +```bash +# Read current body +BODY=$(codeberg_api GET "/issues/NNN" | jq -r '.body') +# Edit (remove the circular ref, keep other deps) +NEW_BODY=$(echo "$BODY" | sed 's/- #XXX/- None/') +codeberg_api PATCH "/issues/NNN" -d "$(jq -nc --arg b "$NEW_BODY" '{body:$b}')" +``` + +### Stale dependencies (P3) +When you see "Stale dependency: #A blocked by #B (open N days)", the dep may be +obsolete or misprioritized. Investigate: + +1. Check if dep #B is still relevant (read its body, check if the code it targets changed) +2. If the dep is obsolete → remove it from #A's `## Dependencies` section +3. If the dep is still needed → escalate, suggesting to prioritize #B or split #A + +### Dev-agent blocked (P2) +When you see "Dev-agent blocked: last N polls all report 'no ready issues'": + +1. Check if circular deps exist (they'll appear as separate P3 alerts) +2. Check if all backlog issues depend on a single unmerged issue — if so, escalate + to prioritize that blocker +3. If no clear blocker, escalate with the list of blocked issues and their deps + ## Escalation If you can't fix it, escalate via Matrix: diff --git a/supervisor/supervisor-poll.sh b/supervisor/supervisor-poll.sh index a2441a1..9f49f18 100755 --- a/supervisor/supervisor-poll.sh +++ b/supervisor/supervisor-poll.sh @@ -225,6 +225,22 @@ if [ "${BACKLOG_COUNT:-0}" -gt 0 ] && [ "${IN_PROGRESS:-0}" -eq 0 ]; then fi fi +# ============================================================================= +# P2c: DEV-AGENT PRODUCTIVITY — all backlog blocked for too long +# ============================================================================= +status "P2: checking dev-agent productivity" + +DEV_LOG_FILE="${FACTORY_ROOT}/dev/dev-agent.log" +if [ -f "$DEV_LOG_FILE" ]; then + # Check if last 6 poll entries all report "no ready issues" (~1 hour at 10min intervals) + RECENT_POLLS=$(tail -100 "$DEV_LOG_FILE" | grep "poll:" | tail -6) + TOTAL_RECENT=$(echo "$RECENT_POLLS" | grep -c "." || true) + BLOCKED_IN_RECENT=$(echo "$RECENT_POLLS" | grep -c "no ready issues" || true) + if [ "$TOTAL_RECENT" -ge 6 ] && [ "$BLOCKED_IN_RECENT" -eq "$TOTAL_RECENT" ]; then + p2 "Dev-agent blocked: last ${BLOCKED_IN_RECENT} polls all report 'no ready issues' — all backlog issues may be dep-blocked or have circular deps" + fi +fi + # ============================================================================= # P3: FACTORY DEGRADED — derailed PRs, unreviewed PRs # ============================================================================= @@ -273,6 +289,150 @@ for pr in $OPEN_PRS; do fi done +# ============================================================================= +# P3b: CIRCULAR DEPENDENCIES — deadlock detection +# ============================================================================= +status "P3: checking for circular dependencies" + +BACKLOG_FOR_DEPS=$(codeberg_api GET "/issues?state=open&labels=backlog&type=issues&limit=50" 2>/dev/null || true) +if [ -n "$BACKLOG_FOR_DEPS" ] && [ "$BACKLOG_FOR_DEPS" != "null" ] && [ "$(echo "$BACKLOG_FOR_DEPS" | jq 'length' 2>/dev/null || echo 0)" -gt 0 ]; then + + CYCLES=$(echo "$BACKLOG_FOR_DEPS" | python3 -c ' +import sys, json, re + +issues = json.load(sys.stdin) + +def parse_deps(body): + deps = set() + in_section = False + for line in (body or "").split("\n"): + if re.match(r"^##?\s*(Depends on|Blocked by|Dependencies)", line, re.IGNORECASE): + in_section = True + continue + if in_section and re.match(r"^##?\s", line): + in_section = False + if in_section: + deps.update(int(m) for m in re.findall(r"#(\d+)", line)) + if re.search(r"(depends on|blocked by)", line, re.IGNORECASE): + deps.update(int(m) for m in re.findall(r"#(\d+)", line)) + return deps + +graph = {} +for issue in issues: + num = issue["number"] + deps = parse_deps(issue.get("body", "")) + deps.discard(num) + if deps: + graph[num] = deps + +WHITE, GRAY, BLACK = 0, 1, 2 +color = {n: WHITE for n in graph} +cycles = [] + +def dfs(u, path): + color[u] = GRAY + path.append(u) + for v in graph.get(u, set()): + if v not in color: + continue + if color[v] == GRAY: + cycles.append(path[path.index(v):] + [v]) + elif color[v] == WHITE: + dfs(v, path) + path.pop() + color[u] = BLACK + +for node in list(graph.keys()): + if color.get(node) == WHITE: + dfs(node, []) + +seen = set() +for cycle in cycles: + key = tuple(sorted(set(cycle))) + if key not in seen: + seen.add(key) + print(" -> ".join(f"#{n}" for n in cycle)) +' 2>/dev/null || true) + + if [ -n "$CYCLES" ]; then + while IFS= read -r cycle; do + [ -z "$cycle" ] && continue + p3 "Circular dependency deadlock: ${cycle}" + done <<< "$CYCLES" + fi + + # =========================================================================== + # P3c: STALE DEPENDENCIES — blocked by old open issues (>30 days) + # =========================================================================== + status "P3: checking for stale dependencies" + + STALE_DEPS=$(echo "$BACKLOG_FOR_DEPS" | CODEBERG_TOKEN="$CODEBERG_TOKEN" CODEBERG_API="$CODEBERG_API" python3 -c ' +import sys, json, re, os +from datetime import datetime, timezone +from urllib.request import Request, urlopen + +issues = json.load(sys.stdin) +token = os.environ.get("CODEBERG_TOKEN", "") +api = os.environ.get("CODEBERG_API", "") +issue_map = {i["number"]: i for i in issues} +now = datetime.now(timezone.utc) + +def parse_deps(body): + deps = set() + in_section = False + for line in (body or "").split("\n"): + if re.match(r"^##?\s*(Depends on|Blocked by|Dependencies)", line, re.IGNORECASE): + in_section = True + continue + if in_section and re.match(r"^##?\s", line): + in_section = False + if in_section: + deps.update(int(m) for m in re.findall(r"#(\d+)", line)) + if re.search(r"(depends on|blocked by)", line, re.IGNORECASE): + deps.update(int(m) for m in re.findall(r"#(\d+)", line)) + return deps + +checked = {} +for issue in issues: + num = issue["number"] + deps = parse_deps(issue.get("body", "")) + deps.discard(num) + for dep in deps: + if dep in checked: + dep_data = checked[dep] + elif dep in issue_map: + dep_data = issue_map[dep] + checked[dep] = dep_data + else: + try: + req = Request(f"{api}/issues/{dep}", + headers={"Authorization": f"token {token}"}) + with urlopen(req, timeout=5) as resp: + dep_data = json.loads(resp.read()) + checked[dep] = dep_data + except Exception: + continue + if dep_data.get("state") != "open": + continue + created = dep_data.get("created_at", "") + try: + created_dt = datetime.fromisoformat(created.replace("Z", "+00:00")) + age_days = (now - created_dt).days + if age_days > 30: + dep_title = dep_data.get("title", "")[:50] + print(f"#{num} blocked by #{dep} \"{dep_title}\" (open {age_days} days)") + except Exception: + pass +' 2>/dev/null || true) + + if [ -n "$STALE_DEPS" ]; then + while IFS= read -r stale; do + [ -z "$stale" ] && continue + p3 "Stale dependency: ${stale}" + done <<< "$STALE_DEPS" + fi +fi + # ============================================================================= # P4: HOUSEKEEPING — stale processes # =============================================================================