From 6cf580c0104aa2a66830735e452d753091a60c2e Mon Sep 17 00:00:00 2001 From: johba Date: Mon, 16 Mar 2026 21:16:49 +0100 Subject: [PATCH] refactor: extract shared dep parser to lib/parse-deps.py (Closes #20) Single source of truth for dependency parsing, replacing three copies: - dev-poll.sh get_deps() now calls parse-deps.py - supervisor P3b/P3c import parse_deps() via importlib Supports stdin, argument, and --json modes for different callers. Co-Authored-By: Claude Opus 4.6 (1M context) --- dev/dev-poll.sh | 14 ++------- lib/parse-deps.py | 59 +++++++++++++++++++++++++++++++++++ supervisor/supervisor-poll.sh | 57 ++++++++++----------------------- 3 files changed, 78 insertions(+), 52 deletions(-) create mode 100755 lib/parse-deps.py diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index a3c8fad..91a39c2 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -115,18 +115,8 @@ dep_is_merged() { # ============================================================================= get_deps() { local issue_body="$1" - # Extract #NNN references from "Depends on" / "Blocked by" sections - # Capture the header line AND subsequent lines until next ## section - { - echo "$issue_body" | awk ' - BEGIN { IGNORECASE=1 } - /^##? *(Depends on|Blocked by|Dependencies)/ { capture=1; next } - capture && /^##? / { capture=0 } - capture { print } - ' | grep -oP '#\K[0-9]+' || true - # Also check inline deps on same line as keyword - echo "$issue_body" | grep -iE '(depends on|blocked by)' | grep -oP '#\K[0-9]+' || true - } | sort -un + # Shared parser: lib/parse-deps.py (single source of truth) + echo "$issue_body" | python3 "${FACTORY_ROOT}/lib/parse-deps.py" } # ============================================================================= diff --git a/lib/parse-deps.py b/lib/parse-deps.py new file mode 100755 index 0000000..3214688 --- /dev/null +++ b/lib/parse-deps.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +"""Extract dependency issue numbers from an issue body. + +Usage: + echo "$ISSUE_BODY" | python3 lib/parse-deps.py + python3 lib/parse-deps.py "$ISSUE_BODY" + python3 lib/parse-deps.py --json < issues.json + +Modes: + stdin/arg: reads a single issue body, prints one dep number per line + --json: reads a JSON array of issues from stdin, prints JSON + dep graph: {"issue_num": [dep1, dep2], ...} + +Matches the same logic as dev-poll.sh get_deps(): + - Sections: ## Dependencies / ## Depends on / ## Blocked by + - Inline: "depends on #NNN" / "blocked by #NNN" anywhere + - Ignores: ## Related (safe for sibling cross-references) +""" +import json +import re +import sys + + +def parse_deps(body): + """Return sorted list of unique dependency issue numbers from an issue body.""" + deps = set() + in_section = False + for line in (body or "").split("\n"): + if re.match(r"^##?\s*(Depends on|Blocked by|Dependencies)", line, re.IGNORECASE): + in_section = True + continue + if in_section and re.match(r"^##?\s", line): + in_section = False + if in_section: + deps.update(int(m) for m in re.findall(r"#(\d+)", line)) + if re.search(r"(depends on|blocked by)", line, re.IGNORECASE): + deps.update(int(m) for m in re.findall(r"#(\d+)", line)) + return sorted(deps) + + +if __name__ == "__main__": + if len(sys.argv) > 1 and sys.argv[1] == "--json": + issues = json.load(sys.stdin) + graph = {} + for issue in issues: + num = issue["number"] + deps = parse_deps(issue.get("body", "")) + deps = [d for d in deps if d != num] + if deps: + graph[num] = deps + json.dump(graph, sys.stdout) + print() + else: + if len(sys.argv) > 1: + body = sys.argv[1] + else: + body = sys.stdin.read() + for dep in parse_deps(body): + print(dep) diff --git a/supervisor/supervisor-poll.sh b/supervisor/supervisor-poll.sh index 9f49f18..8c8f99b 100755 --- a/supervisor/supervisor-poll.sh +++ b/supervisor/supervisor-poll.sh @@ -297,33 +297,22 @@ status "P3: checking for circular dependencies" BACKLOG_FOR_DEPS=$(codeberg_api GET "/issues?state=open&labels=backlog&type=issues&limit=50" 2>/dev/null || true) if [ -n "$BACKLOG_FOR_DEPS" ] && [ "$BACKLOG_FOR_DEPS" != "null" ] && [ "$(echo "$BACKLOG_FOR_DEPS" | jq 'length' 2>/dev/null || echo 0)" -gt 0 ]; then + PARSE_DEPS="${FACTORY_ROOT}/lib/parse-deps.py" + CYCLES=$(echo "$BACKLOG_FOR_DEPS" | python3 -c ' -import sys, json, re +import sys, json, importlib.util + +spec = importlib.util.spec_from_file_location("parse_deps", sys.argv[1]) +mod = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mod) issues = json.load(sys.stdin) - -def parse_deps(body): - deps = set() - in_section = False - for line in (body or "").split("\n"): - if re.match(r"^##?\s*(Depends on|Blocked by|Dependencies)", line, re.IGNORECASE): - in_section = True - continue - if in_section and re.match(r"^##?\s", line): - in_section = False - if in_section: - deps.update(int(m) for m in re.findall(r"#(\d+)", line)) - if re.search(r"(depends on|blocked by)", line, re.IGNORECASE): - deps.update(int(m) for m in re.findall(r"#(\d+)", line)) - return deps - graph = {} for issue in issues: num = issue["number"] - deps = parse_deps(issue.get("body", "")) - deps.discard(num) + deps = [d for d in mod.parse_deps(issue.get("body", "")) if d != num] if deps: - graph[num] = deps + graph[num] = set(deps) WHITE, GRAY, BLACK = 0, 1, 2 color = {n: WHITE for n in graph} @@ -352,7 +341,7 @@ for cycle in cycles: if key not in seen: seen.add(key) print(" -> ".join(f"#{n}" for n in cycle)) -' 2>/dev/null || true) +' "$PARSE_DEPS" 2>/dev/null || true) if [ -n "$CYCLES" ]; then while IFS= read -r cycle; do @@ -367,36 +356,24 @@ for cycle in cycles: status "P3: checking for stale dependencies" STALE_DEPS=$(echo "$BACKLOG_FOR_DEPS" | CODEBERG_TOKEN="$CODEBERG_TOKEN" CODEBERG_API="$CODEBERG_API" python3 -c ' -import sys, json, re, os +import sys, json, os, importlib.util from datetime import datetime, timezone from urllib.request import Request, urlopen +spec = importlib.util.spec_from_file_location("parse_deps", sys.argv[1]) +mod = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mod) + issues = json.load(sys.stdin) token = os.environ.get("CODEBERG_TOKEN", "") api = os.environ.get("CODEBERG_API", "") issue_map = {i["number"]: i for i in issues} now = datetime.now(timezone.utc) -def parse_deps(body): - deps = set() - in_section = False - for line in (body or "").split("\n"): - if re.match(r"^##?\s*(Depends on|Blocked by|Dependencies)", line, re.IGNORECASE): - in_section = True - continue - if in_section and re.match(r"^##?\s", line): - in_section = False - if in_section: - deps.update(int(m) for m in re.findall(r"#(\d+)", line)) - if re.search(r"(depends on|blocked by)", line, re.IGNORECASE): - deps.update(int(m) for m in re.findall(r"#(\d+)", line)) - return deps - checked = {} for issue in issues: num = issue["number"] - deps = parse_deps(issue.get("body", "")) - deps.discard(num) + deps = [d for d in mod.parse_deps(issue.get("body", "")) if d != num] for dep in deps: if dep in checked: dep_data = checked[dep] @@ -423,7 +400,7 @@ for issue in issues: print(f"#{num} blocked by #{dep} \"{dep_title}\" (open {age_days} days)") except Exception: pass -' 2>/dev/null || true) +' "$PARSE_DEPS" 2>/dev/null || true) if [ -n "$STALE_DEPS" ]; then while IFS= read -r stale; do