2026-03-12 12:44:15 +00:00
#!/usr/bin/env bash
2026-03-21 19:59:55 +00:00
set -euo pipefail
2026-03-15 18:06:25 +01:00
# supervisor-poll.sh — Supervisor agent: bash checks + claude -p for fixes
2026-03-12 12:44:15 +00:00
#
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# Two-layer architecture:
# 1. Factory infrastructure (project-agnostic): RAM, disk, swap, docker, stale processes
# 2. Per-project checks (config-driven): CI, PRs, dev-agent, deps — iterated over projects/*.toml
#
# Runs every 10min via cron.
2026-03-12 12:44:15 +00:00
#
2026-03-15 18:06:25 +01:00
# Cron: */10 * * * * /path/to/disinto/supervisor/supervisor-poll.sh
2026-03-12 12:44:15 +00:00
#
2026-03-15 18:06:25 +01:00
# Peek: cat /tmp/supervisor-status
# Log: tail -f /path/to/disinto/supervisor/supervisor.log
2026-03-12 12:44:15 +00:00
source " $( dirname " $0 " ) /../lib/env.sh "
2026-03-18 02:05:54 +00:00
source " $( dirname " $0 " ) /../lib/ci-helpers.sh "
2026-03-12 12:44:15 +00:00
2026-03-15 18:06:25 +01:00
LOGFILE = " ${ FACTORY_ROOT } /supervisor/supervisor.log "
STATUSFILE = "/tmp/supervisor-status"
LOCKFILE = "/tmp/supervisor-poll.lock"
PROMPT_FILE = " ${ FACTORY_ROOT } /supervisor/PROMPT.md "
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
PROJECTS_DIR = " ${ FACTORY_ROOT } /projects "
2026-03-12 12:44:15 +00:00
2026-03-17 08:40:56 +00:00
METRICS_FILE = " ${ FACTORY_ROOT } /metrics/supervisor-metrics.jsonl "
emit_metric( ) {
printf '%s\n' " $1 " >> " $METRICS_FILE "
}
2026-03-17 09:31:49 +00:00
# Count all matching items from a paginated Codeberg API endpoint.
# Usage: codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues"
# Returns total count across all pages (max 20 pages = 1000 items).
codeberg_count_paginated( ) {
local endpoint = " $1 " total = 0 page = 1 count
while true; do
count = $( codeberg_api GET " ${ endpoint } &limit=50&page= ${ page } " 2>/dev/null | jq 'length' 2>/dev/null || echo 0)
total = $(( total + ${ count :- 0 } ))
[ " ${ count :- 0 } " -lt 50 ] && break
page = $(( page + 1 ))
[ " $page " -gt 20 ] && break
done
echo " $total "
}
2026-03-17 08:40:56 +00:00
rotate_metrics( ) {
[ -f " $METRICS_FILE " ] || return 0
local cutoff tmpfile
cutoff = $( date -u -d '30 days ago' +%Y-%m-%dT%H:%M)
tmpfile = " ${ METRICS_FILE } .tmp "
2026-03-17 09:31:49 +00:00
jq -c --arg cutoff " $cutoff " 'select(.ts >= $cutoff)' \
" $METRICS_FILE " > " $tmpfile " 2>/dev/null
# Only replace if jq produced output, or the source is already empty
if [ -s " $tmpfile " ] || [ ! -s " $METRICS_FILE " ] ; then
mv " $tmpfile " " $METRICS_FILE "
else
rm -f " $tmpfile "
fi
2026-03-17 08:40:56 +00:00
}
2026-03-12 12:44:15 +00:00
# Prevent overlapping runs
if [ -f " $LOCKFILE " ] ; then
LOCK_PID = $( cat " $LOCKFILE " 2>/dev/null)
if kill -0 " $LOCK_PID " 2>/dev/null; then
exit 0
fi
rm -f " $LOCKFILE "
fi
echo $$ > " $LOCKFILE "
trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT
2026-03-17 09:31:49 +00:00
mkdir -p " $( dirname " $METRICS_FILE " ) "
2026-03-17 08:40:56 +00:00
rotate_metrics
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
flog( ) {
printf '[%s] %s\n' " $( date -u '+%Y-%m-%d %H:%M:%S UTC' ) " " $* " >> " $LOGFILE "
}
2026-03-12 12:44:15 +00:00
status( ) {
2026-03-15 18:06:25 +01:00
printf '[%s] supervisor: %s\n' " $( date -u '+%Y-%m-%d %H:%M:%S UTC' ) " " $* " > " $STATUSFILE "
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
flog " $* "
2026-03-12 12:44:15 +00:00
}
2026-03-14 16:25:33 +01:00
# ── Check for escalation replies from Matrix ──────────────────────────────
ESCALATION_REPLY = ""
2026-03-15 18:06:25 +01:00
if [ -s /tmp/supervisor-escalation-reply ] ; then
ESCALATION_REPLY = $( cat /tmp/supervisor-escalation-reply)
rm -f /tmp/supervisor-escalation-reply
2026-03-14 16:25:33 +01:00
flog " Got escalation reply: $( echo " $ESCALATION_REPLY " | head -1) "
fi
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# Alerts by priority
P0_ALERTS = ""
P1_ALERTS = ""
P2_ALERTS = ""
P3_ALERTS = ""
P4_ALERTS = ""
p0( ) { P0_ALERTS = " ${ P0_ALERTS } • [P0] $* \n " ; flog " P0: $* " ; }
p1( ) { P1_ALERTS = " ${ P1_ALERTS } • [P1] $* \n " ; flog " P1: $* " ; }
p2( ) { P2_ALERTS = " ${ P2_ALERTS } • [P2] $* \n " ; flog " P2: $* " ; }
p3( ) { P3_ALERTS = " ${ P3_ALERTS } • [P3] $* \n " ; flog " P3: $* " ; }
p4( ) { P4_ALERTS = " ${ P4_ALERTS } • [P4] $* \n " ; flog " P4: $* " ; }
FIXES = ""
fixed( ) { FIXES = " ${ FIXES } • ✅ $* \n " ; flog " FIXED: $* " ; }
2026-03-12 12:44:15 +00:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# #############################################################################
# LAYER 1: FACTORY INFRASTRUCTURE
# (project-agnostic, runs once)
# #############################################################################
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# P0: MEMORY — check first, fix first
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
status "P0: checking memory"
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
AVAIL_MB = $( free -m | awk '/Mem:/{print $7}' )
SWAP_USED_MB = $( free -m | awk '/Swap:/{print $3}' )
2026-03-12 12:44:15 +00:00
2026-03-12 13:16:24 +00:00
if [ " ${ AVAIL_MB :- 9999 } " -lt 500 ] || { [ " ${ SWAP_USED_MB :- 0 } " -gt 3000 ] && [ " ${ AVAIL_MB :- 9999 } " -lt 2000 ] ; } ; then
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
flog " MEMORY CRISIS: avail= ${ AVAIL_MB } MB swap_used= ${ SWAP_USED_MB } MB — auto-fixing "
2026-03-15 18:06:25 +01:00
# Kill stale agent-spawned claude processes (>3h old) — skip interactive sessions
2026-03-15 10:39:40 +01:00
STALE_CLAUDES = $( pgrep -f "claude -p" --older 10800 2>/dev/null || true )
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if [ -n " $STALE_CLAUDES " ] ; then
echo " $STALE_CLAUDES " | xargs kill 2>/dev/null || true
fixed " Killed stale claude processes: ${ STALE_CLAUDES } "
fi
# Drop filesystem caches
sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>& 1
fixed "Dropped filesystem caches"
# Re-check after fixes
AVAIL_MB_AFTER = $( free -m | awk '/Mem:/{print $7}' )
SWAP_AFTER = $( free -m | awk '/Swap:/{print $3}' )
if [ " ${ AVAIL_MB_AFTER :- 0 } " -lt 500 ] || [ " ${ SWAP_AFTER :- 0 } " -gt 3000 ] ; then
p0 " Memory still critical after auto-fix: avail= ${ AVAIL_MB_AFTER } MB swap= ${ SWAP_AFTER } MB "
else
flog " Memory recovered: avail= ${ AVAIL_MB_AFTER } MB swap= ${ SWAP_AFTER } MB "
fi
fi
2026-03-12 12:44:15 +00:00
2026-03-19 09:16:56 +00:00
# P0 is urgent — send immediately before per-project checks can crash the script
if [ -n " $P0_ALERTS " ] ; then
matrix_send "supervisor" " 🚨 Supervisor P0 alerts:
$( printf '%b' " $P0_ALERTS " ) " 2>/dev/null || true
2026-03-19 09:23:47 +00:00
P0_ALERTS = "" # clear so it is not duplicated in the final consolidated send
2026-03-19 09:16:56 +00:00
fi
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# P1: DISK
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
status "P1: checking disk"
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
DISK_PERCENT = $( df -h / | awk 'NR==2{print $5}' | tr -d '%' )
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if [ " ${ DISK_PERCENT :- 0 } " -gt 80 ] ; then
flog " DISK PRESSURE: ${ DISK_PERCENT } % — auto-cleaning "
# Docker cleanup (safe — keeps images)
sudo docker system prune -f >/dev/null 2>& 1 && fixed "Docker prune"
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# Truncate logs >10MB
for logfile in " ${ FACTORY_ROOT } " /{ dev,review,supervisor} /*.log; do
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if [ -f " $logfile " ] ; then
SIZE_KB = $( du -k " $logfile " 2>/dev/null | cut -f1)
if [ " ${ SIZE_KB :- 0 } " -gt 10240 ] ; then
truncate -s 0 " $logfile "
fixed " Truncated $( basename " $logfile " ) (was ${ SIZE_KB } KB) "
fi
fi
done
# Woodpecker log_entries cleanup
LOG_ENTRIES_MB = $( wpdb -c "SELECT pg_size_pretty(pg_total_relation_size('log_entries'));" 2>/dev/null | xargs)
if echo " $LOG_ENTRIES_MB " | grep -qP '\d+\s*(GB|MB)' ; then
SIZE_NUM = $( echo " $LOG_ENTRIES_MB " | grep -oP '\d+' )
SIZE_UNIT = $( echo " $LOG_ENTRIES_MB " | grep -oP '(GB|MB)' )
if [ " $SIZE_UNIT " = "GB" ] || { [ " $SIZE_UNIT " = "MB" ] && [ " $SIZE_NUM " -gt 500 ] ; } ; then
wpdb -c "DELETE FROM log_entries WHERE id < (SELECT max(id) - 100000 FROM log_entries);" 2>/dev/null
fixed " Trimmed Woodpecker log_entries (was ${ LOG_ENTRIES_MB } ) "
2026-03-12 12:44:15 +00:00
fi
fi
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
DISK_AFTER = $( df -h / | awk 'NR==2{print $5}' | tr -d '%' )
if [ " ${ DISK_AFTER :- 0 } " -gt 80 ] ; then
p1 " Disk still ${ DISK_AFTER } % after auto-clean "
else
flog " Disk recovered: ${ DISK_AFTER } % "
fi
fi
2026-03-12 12:44:15 +00:00
2026-03-19 09:16:56 +00:00
# P1 is urgent — send immediately before per-project checks can crash the script
if [ -n " $P1_ALERTS " ] ; then
matrix_send "supervisor" " ⚠️ Supervisor P1 alerts:
$( printf '%b' " $P1_ALERTS " ) " 2>/dev/null || true
2026-03-19 09:23:47 +00:00
P1_ALERTS = "" # clear so it is not duplicated in the final consolidated send
2026-03-19 09:16:56 +00:00
fi
2026-03-17 08:40:56 +00:00
# Emit infra metric
_RAM_TOTAL_MB = $( free -m | awk '/Mem:/{print $2}' )
2026-03-17 09:31:49 +00:00
_RAM_USED_PCT = $(( ${ _RAM_TOTAL_MB :- 0 } > 0 ? ( ${ _RAM_TOTAL_MB :- 0 } - ${ AVAIL_MB :- 0 } ) * 100 / ${ _RAM_TOTAL_MB :- 1 } : 0 ))
2026-03-17 08:40:56 +00:00
emit_metric " $( jq -nc \
--arg ts " $( date -u +%Y-%m-%dT%H:%MZ) " \
--argjson ram " ${ _RAM_USED_PCT :- 0 } " \
--argjson disk " ${ DISK_PERCENT :- 0 } " \
--argjson swap " ${ SWAP_USED_MB :- 0 } " \
'{ts:$ts,type:"infra",ram_used_pct:$ram,disk_used_pct:$disk,swap_mb:$swap}' 2>/dev/null) " 2>/dev/null || true
2026-03-12 12:44:15 +00:00
# =============================================================================
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# P4-INFRA: HOUSEKEEPING — stale processes, log rotation (project-agnostic)
2026-03-12 12:44:15 +00:00
# =============================================================================
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
status "P4: infra housekeeping"
# Stale agent-spawned claude processes (>3h) — skip interactive sessions
STALE_CLAUDES = $( pgrep -f "claude -p" --older 10800 2>/dev/null || true )
if [ -n " $STALE_CLAUDES " ] ; then
echo " $STALE_CLAUDES " | xargs kill 2>/dev/null || true
fixed " Killed stale claude processes: $( echo $STALE_CLAUDES | wc -w) procs "
2026-03-12 12:44:15 +00:00
fi
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# Rotate logs >5MB
for logfile in " ${ FACTORY_ROOT } " /{ dev,review,supervisor} /*.log; do
if [ -f " $logfile " ] ; then
SIZE_KB = $( du -k " $logfile " 2>/dev/null | cut -f1)
if [ " ${ SIZE_KB :- 0 } " -gt 5120 ] ; then
mv " $logfile " " ${ logfile } .old " 2>/dev/null
fixed " Rotated $( basename " $logfile " ) "
fi
fi
done
2026-03-12 12:44:15 +00:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# #############################################################################
# LAYER 2: PER-PROJECT CHECKS
# (iterated over projects/*.toml, config-driven)
# #############################################################################
2026-03-12 18:06:08 +00:00
2026-03-18 01:18:34 +00:00
# Infra retry tracking (shared across projects, created once)
_RETRY_DIR = "/tmp/supervisor-infra-retries"
mkdir -p " $_RETRY_DIR "
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# Function: run all per-project checks for the currently loaded project config
check_project( ) {
local proj_name = " ${ PROJECT_NAME :- unknown } "
flog " ── checking project: ${ proj_name } ( ${ CODEBERG_REPO } ) ── "
2026-03-12 18:06:08 +00:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# ===========================================================================
# P2: FACTORY STOPPED — CI, dev-agent, git
# ===========================================================================
status " P2: ${ proj_name } : checking pipeline "
# CI stuck
STUCK_CI = $( wpdb -c " SELECT count(*) FROM pipelines WHERE repo_id= ${ WOODPECKER_REPO_ID } AND status='running' AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200; " 2>/dev/null | xargs || true )
[ " ${ STUCK_CI :- 0 } " -gt 0 ] 2>/dev/null && p2 " ${ proj_name } : CI: ${ STUCK_CI } pipeline(s) running >20min "
PENDING_CI = $( wpdb -c " SELECT count(*) FROM pipelines WHERE repo_id= ${ WOODPECKER_REPO_ID } AND status='pending' AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800; " 2>/dev/null | xargs || true )
[ " ${ PENDING_CI :- 0 } " -gt 0 ] && p2 " ${ proj_name } : CI: ${ PENDING_CI } pipeline(s) pending >30min "
2026-03-17 09:31:49 +00:00
# Emit CI metric (last completed pipeline within 24h — skip if project has no recent CI)
_CI_ROW = $( wpdb -A -F ',' -c " SELECT id, COALESCE(ROUND(EXTRACT(EPOCH FROM (to_timestamp(finished) - to_timestamp(started)))/60)::int, 0), status FROM pipelines WHERE repo_id= ${ WOODPECKER_REPO_ID } AND status IN ('success','failure','error') AND finished > 0 AND to_timestamp(finished) > now() - interval '24 hours' ORDER BY id DESC LIMIT 1; " 2>/dev/null | grep -E '^[0-9]' | head -1 || true )
2026-03-17 08:40:56 +00:00
if [ -n " $_CI_ROW " ] ; then
_CI_ID = $( echo " $_CI_ROW " | cut -d',' -f1 | tr -d ' ' )
_CI_DUR = $( echo " $_CI_ROW " | cut -d',' -f2 | tr -d ' ' )
_CI_STAT = $( echo " $_CI_ROW " | cut -d',' -f3 | tr -d ' ' )
emit_metric " $( jq -nc \
--arg ts " $( date -u +%Y-%m-%dT%H:%MZ) " \
--arg proj " $proj_name " \
--argjson pipeline " ${ _CI_ID :- 0 } " \
--argjson duration " ${ _CI_DUR :- 0 } " \
--arg status " ${ _CI_STAT :- unknown } " \
'{ts:$ts,type:"ci",project:$proj,pipeline:$pipeline,duration_min:$duration,status:$status}' 2>/dev/null) " 2>/dev/null || true
fi
2026-03-18 01:08:35 +00:00
# ===========================================================================
# P2e: INFRA FAILURES — auto-retrigger pipelines with infra failures
# ===========================================================================
if [ " ${ CHECK_INFRA_RETRY :- true } " = "true" ] ; then
2026-03-18 01:18:34 +00:00
status " P2e: ${ proj_name } : checking infra failures "
2026-03-18 01:08:35 +00:00
# Recent failed pipelines (last 6h)
2026-03-18 01:18:34 +00:00
_failed_nums = $( wpdb -A -c "
2026-03-18 01:08:35 +00:00
SELECT number FROM pipelines
WHERE repo_id = ${ WOODPECKER_REPO_ID }
AND status IN ( 'failure' , 'error' )
AND finished > 0
AND to_timestamp( finished) > now( ) - interval '6 hours'
ORDER BY number DESC LIMIT 5; " 2>/dev/null \
| tr -d ' ' | grep -E '^[0-9]+$' || true )
2026-03-18 01:18:34 +00:00
# shellcheck disable=SC2086
2026-03-18 01:08:35 +00:00
for _pip_num in $_failed_nums ; do
[ -z " $_pip_num " ] && continue
2026-03-18 01:18:34 +00:00
# Check retry count; alert if retries exhausted
2026-03-18 01:08:35 +00:00
_retry_file = " ${ _RETRY_DIR } / ${ WOODPECKER_REPO_ID } - ${ _pip_num } "
_retries = 0
[ -f " $_retry_file " ] && _retries = $( cat " $_retry_file " 2>/dev/null || echo 0)
2026-03-18 01:18:34 +00:00
if [ " ${ _retries :- 0 } " -ge 2 ] ; then
p2 " ${ proj_name } : Pipeline # ${ _pip_num } : infra retries exhausted (2/2), needs manual investigation "
continue
fi
2026-03-18 01:08:35 +00:00
2026-03-20 19:19:29 +00:00
# Classify failure type via shared helper
_classification = $( classify_pipeline_failure " ${ WOODPECKER_REPO_ID } " " $_pip_num " 2>/dev/null || echo "code" )
2026-03-18 01:08:35 +00:00
2026-03-20 19:19:29 +00:00
if [ [ " $_classification " = = infra* ] ] ; then
_infra_reason = " ${ _classification #infra } "
2026-03-18 01:08:35 +00:00
_new_retries = $(( _retries + 1 ))
if woodpecker_api " /repos/ ${ WOODPECKER_REPO_ID } /pipelines/ ${ _pip_num } " \
-X POST >/dev/null 2>& 1; then
echo " $_new_retries " > " $_retry_file "
fixed " ${ proj_name } : Retriggered pipeline # ${ _pip_num } ( ${ _infra_reason } , retry ${ _new_retries } /2) "
else
2026-03-18 01:18:34 +00:00
p2 " ${ proj_name } : Pipeline # ${ _pip_num } : infra failure ( ${ _infra_reason } ) but retrigger API call failed "
2026-03-18 01:08:35 +00:00
flog " ${ proj_name } : Failed to retrigger pipeline # ${ _pip_num } : API error "
fi
fi
done
# Clean up stale retry tracking files (>24h)
find " $_RETRY_DIR " -type f -mmin +1440 -delete 2>/dev/null || true
fi
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# Dev-agent health (only if monitoring enabled)
if [ " ${ CHECK_DEV_AGENT :- true } " = "true" ] ; then
2026-03-20 18:44:59 +00:00
DEV_LOCK = " /tmp/dev-agent- ${ PROJECT_NAME } .lock "
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
if [ -f " $DEV_LOCK " ] ; then
DEV_PID = $( cat " $DEV_LOCK " 2>/dev/null)
if ! kill -0 " $DEV_PID " 2>/dev/null; then
rm -f " $DEV_LOCK "
fixed " ${ proj_name } : Removed stale dev-agent lock (PID ${ DEV_PID } dead) "
else
2026-03-21 16:20:07 +00:00
DEV_STATUS_AGE = $( stat -c %Y " /tmp/dev-agent-status- ${ PROJECT_NAME :- default } " 2>/dev/null || echo 0)
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
NOW_EPOCH = $( date +%s)
STATUS_AGE_MIN = $(( ( NOW_EPOCH - DEV_STATUS_AGE) / 60 ))
if [ " $STATUS_AGE_MIN " -gt 30 ] ; then
p2 " ${ proj_name } : Dev-agent: status unchanged for ${ STATUS_AGE_MIN } min "
fi
fi
fi
2026-03-12 18:06:08 +00:00
fi
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# Git repo health
if [ -d " ${ PROJECT_REPO_ROOT } " ] ; then
cd " ${ PROJECT_REPO_ROOT } " 2>/dev/null || true
GIT_BRANCH = $( git branch --show-current 2>/dev/null || echo "unknown" )
GIT_REBASE = $( [ -d .git/rebase-merge ] || [ -d .git/rebase-apply ] && echo "yes" || echo "no" )
2026-03-12 18:06:08 +00:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
if [ " $GIT_REBASE " = "yes" ] ; then
git rebase --abort 2>/dev/null && git checkout " ${ PRIMARY_BRANCH } " 2>/dev/null && \
fixed " ${ proj_name } : Aborted stale rebase, switched to ${ PRIMARY_BRANCH } " || \
p2 " ${ proj_name } : Git: stale rebase, auto-abort failed "
fi
if [ " $GIT_BRANCH " != " ${ PRIMARY_BRANCH } " ] && [ " $GIT_BRANCH " != "unknown" ] ; then
git checkout " ${ PRIMARY_BRANCH } " 2>/dev/null && \
fixed " ${ proj_name } : Switched repo from ' ${ GIT_BRANCH } ' to ${ PRIMARY_BRANCH } " || \
p2 " ${ proj_name } : Git: on ' ${ GIT_BRANCH } ' instead of ${ PRIMARY_BRANCH } "
fi
2026-03-16 21:06:50 +01:00
fi
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# ===========================================================================
# P2b: FACTORY STALLED — backlog exists but no agent running
# ===========================================================================
if [ " ${ CHECK_PIPELINE_STALL :- true } " = "true" ] ; then
status " P2: ${ proj_name } : checking pipeline stall "
BACKLOG_COUNT = $( codeberg_api GET "/issues?state=open&labels=backlog&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0" )
IN_PROGRESS = $( codeberg_api GET "/issues?state=open&labels=in-progress&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0" )
if [ " ${ BACKLOG_COUNT :- 0 } " -gt 0 ] && [ " ${ IN_PROGRESS :- 0 } " -eq 0 ] ; then
DEV_LOG = " ${ FACTORY_ROOT } /dev/dev-agent.log "
if [ -f " $DEV_LOG " ] ; then
LAST_LOG_EPOCH = $( stat -c %Y " $DEV_LOG " 2>/dev/null || echo 0)
else
LAST_LOG_EPOCH = 0
fi
2026-03-12 12:44:15 +00:00
NOW_EPOCH = $( date +%s)
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
IDLE_MIN = $(( ( NOW_EPOCH - LAST_LOG_EPOCH) / 60 ))
if [ " $IDLE_MIN " -gt 20 ] ; then
p2 " ${ proj_name } : Pipeline stalled: ${ BACKLOG_COUNT } backlog issue(s), no agent ran for ${ IDLE_MIN } min "
2026-03-12 12:44:15 +00:00
fi
fi
fi
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# ===========================================================================
# P2c: DEV-AGENT PRODUCTIVITY — all backlog blocked for too long
# ===========================================================================
if [ " ${ CHECK_DEV_AGENT :- true } " = "true" ] ; then
status " P2: ${ proj_name } : checking dev-agent productivity "
DEV_LOG_FILE = " ${ FACTORY_ROOT } /dev/dev-agent.log "
if [ -f " $DEV_LOG_FILE " ] ; then
RECENT_POLLS = $( tail -100 " $DEV_LOG_FILE " | grep "poll:" | tail -6)
TOTAL_RECENT = $( echo " $RECENT_POLLS " | grep -c "." || true )
BLOCKED_IN_RECENT = $( echo " $RECENT_POLLS " | grep -c "no ready issues" || true )
if [ " $TOTAL_RECENT " -ge 6 ] && [ " $BLOCKED_IN_RECENT " -eq " $TOTAL_RECENT " ] ; then
p2 " ${ proj_name } : Dev-agent blocked: last ${ BLOCKED_IN_RECENT } polls all report 'no ready issues' "
fi
fi
fi
2026-03-16 21:22:53 +01:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# ===========================================================================
# P3: FACTORY DEGRADED — derailed PRs, unreviewed PRs
# ===========================================================================
if [ " ${ CHECK_PRS :- true } " = "true" ] ; then
status " P3: ${ proj_name } : checking PRs "
OPEN_PRS = $( codeberg_api GET "/pulls?state=open&limit=10" 2>/dev/null | jq -r '.[].number' 2>/dev/null || true )
for pr in $OPEN_PRS ; do
PR_JSON = $( codeberg_api GET " /pulls/ ${ pr } " 2>/dev/null || true )
[ -z " $PR_JSON " ] && continue
PR_SHA = $( echo " $PR_JSON " | jq -r '.head.sha // ""' )
[ -z " $PR_SHA " ] && continue
CI_STATE = $( codeberg_api GET " /commits/ ${ PR_SHA } /status " 2>/dev/null | jq -r '.state // "unknown"' 2>/dev/null || true )
MERGEABLE = $( echo " $PR_JSON " | jq -r '.mergeable // true' )
2026-03-18 02:05:54 +00:00
if [ " $MERGEABLE " = "false" ] && ci_passed " $CI_STATE " ; then
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
p3 " ${ proj_name } : PR # ${ pr } : CI pass but merge conflict — needs rebase "
elif [ " $CI_STATE " = "failure" ] || [ " $CI_STATE " = "error" ] ; then
UPDATED = $( echo " $PR_JSON " | jq -r '.updated_at // ""' )
if [ -n " $UPDATED " ] ; then
UPDATED_EPOCH = $( date -d " $UPDATED " +%s 2>/dev/null || echo 0)
NOW_EPOCH = $( date +%s)
AGE_MIN = $(( ( NOW_EPOCH - UPDATED_EPOCH) / 60 ))
[ " $AGE_MIN " -gt 30 ] && p3 " ${ proj_name } : PR # ${ pr } : CI= ${ CI_STATE } , stale ${ AGE_MIN } min "
fi
2026-03-18 02:05:54 +00:00
elif ci_passed " $CI_STATE " ; then
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
HAS_REVIEW = $( codeberg_api GET " /issues/ ${ pr } /comments?limit=50 " 2>/dev/null | \
jq -r --arg sha " $PR_SHA " '[.[] | select(.body | contains("<!-- reviewed: " + $sha))] | length' 2>/dev/null || echo "0" )
if [ " ${ HAS_REVIEW :- 0 } " -eq 0 ] ; then
UPDATED = $( echo " $PR_JSON " | jq -r '.updated_at // ""' )
if [ -n " $UPDATED " ] ; then
UPDATED_EPOCH = $( date -d " $UPDATED " +%s 2>/dev/null || echo 0)
NOW_EPOCH = $( date +%s)
AGE_MIN = $(( ( NOW_EPOCH - UPDATED_EPOCH) / 60 ))
if [ " $AGE_MIN " -gt 60 ] ; then
p3 " ${ proj_name } : PR # ${ pr } : CI passed, no review for ${ AGE_MIN } min "
bash " ${ FACTORY_ROOT } /review/review-pr.sh " " $pr " >> " ${ FACTORY_ROOT } /review/review.log " 2>& 1 &
fixed " ${ proj_name } : Auto-triggered review for PR # ${ pr } "
fi
fi
2026-03-16 21:22:53 +01:00
fi
fi
done
2026-03-16 21:06:50 +01:00
fi
# ===========================================================================
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# P3b: CIRCULAR DEPENDENCIES — deadlock detection
2026-03-16 21:06:50 +01:00
# ===========================================================================
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
status " P3: ${ proj_name } : checking for circular dependencies "
BACKLOG_FOR_DEPS = $( codeberg_api GET "/issues?state=open&labels=backlog&type=issues&limit=50" 2>/dev/null || true )
if [ -n " $BACKLOG_FOR_DEPS " ] && [ " $BACKLOG_FOR_DEPS " != "null" ] && [ " $( echo " $BACKLOG_FOR_DEPS " | jq 'length' 2>/dev/null || echo 0) " -gt 0 ] ; then
PARSE_DEPS = " ${ FACTORY_ROOT } /lib/parse-deps.sh "
ISSUE_COUNT = $( echo " $BACKLOG_FOR_DEPS " | jq 'length' )
declare -A DEPS_OF
declare -A BACKLOG_NUMS
for i in $( seq 0 $(( ISSUE_COUNT - 1 )) ) ; do
NUM = $( echo " $BACKLOG_FOR_DEPS " | jq -r " .[ $i ].number " )
BODY = $( echo " $BACKLOG_FOR_DEPS " | jq -r " .[ $i ].body // \"\" " )
ISSUE_DEPS = $( echo " $BODY " | bash " $PARSE_DEPS " | grep -v " ^ ${ NUM } $" || true )
[ -n " $ISSUE_DEPS " ] && DEPS_OF[ $NUM ] = " $ISSUE_DEPS "
BACKLOG_NUMS[ $NUM ] = 1
done
2026-03-16 21:06:50 +01:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
declare -A NODE_COLOR
2026-03-23 08:16:13 +00:00
for node in " ${ !BACKLOG_NUMS[@] } " ; do NODE_COLOR[ $node ] = 0; done
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
FOUND_CYCLES = ""
declare -A SEEN_CYCLES
dfs_detect_cycle( ) {
local node = " $1 " path = " $2 "
NODE_COLOR[ $node ] = 1
for dep in ${ DEPS_OF [ $node ] :- } ; do
2026-03-23 08:16:13 +00:00
[ -z " ${ BACKLOG_NUMS [ $dep ]+x } " ] && continue
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
if [ " ${ NODE_COLOR [ $dep ] } " = "1" ] ; then
2026-03-18 01:53:02 +00:00
local cycle_key
cycle_key = $( echo " $path $dep " | tr ' ' '\n' | sort -n | tr '\n' ' ' )
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
if [ -z " ${ SEEN_CYCLES [ $cycle_key ]+x } " ] ; then
SEEN_CYCLES[ $cycle_key ] = 1
local in_cycle = 0 cycle_str = ""
for p in $path $dep ; do
[ " $p " = " $dep " ] && in_cycle = 1
[ " $in_cycle " = "1" ] && cycle_str = " ${ cycle_str : + $cycle_str -> } # ${ p } "
done
FOUND_CYCLES = " ${ FOUND_CYCLES } ${ cycle_str } \n "
fi
elif [ " ${ NODE_COLOR [ $dep ] } " = "0" ] ; then
dfs_detect_cycle " $dep " " $path $dep "
fi
done
NODE_COLOR[ $node ] = 2
}
2026-03-16 21:22:53 +01:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
for node in " ${ !DEPS_OF[@] } " ; do
[ " ${ NODE_COLOR [ $node ] :- 2 } " = "0" ] && dfs_detect_cycle " $node " " $node "
done
2026-03-16 21:22:53 +01:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
if [ -n " $FOUND_CYCLES " ] ; then
echo -e " $FOUND_CYCLES " | while IFS = read -r cycle; do
[ -z " $cycle " ] && continue
p3 " ${ proj_name } : Circular dependency deadlock: ${ cycle } "
done
fi
2026-03-16 21:22:53 +01:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# =========================================================================
# P3c: STALE DEPENDENCIES — blocked by old open issues (>30 days)
# =========================================================================
status " P3: ${ proj_name } : checking for stale dependencies "
2026-03-16 21:22:53 +01:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
NOW_EPOCH = $( date +%s)
declare -A DEP_CACHE
for issue_num in " ${ !DEPS_OF[@] } " ; do
for dep in ${ DEPS_OF [ $issue_num ] } ; do
if [ -n " ${ DEP_CACHE [ $dep ]+x } " ] ; then
DEP_INFO = " ${ DEP_CACHE [ $dep ] } "
else
DEP_JSON = $( codeberg_api GET " /issues/ ${ dep } " 2>/dev/null || true )
[ -z " $DEP_JSON " ] && continue
DEP_STATE = $( echo " $DEP_JSON " | jq -r '.state // "unknown"' )
DEP_CREATED = $( echo " $DEP_JSON " | jq -r '.created_at // ""' )
DEP_TITLE = $( echo " $DEP_JSON " | jq -r '.title // ""' | head -c 50)
DEP_INFO = " ${ DEP_STATE } | ${ DEP_CREATED } | ${ DEP_TITLE } "
DEP_CACHE[ $dep ] = " $DEP_INFO "
fi
2026-03-16 21:22:53 +01:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
DEP_STATE = " ${ DEP_INFO %%|* } "
[ " $DEP_STATE " != "open" ] && continue
2026-03-16 21:06:50 +01:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
DEP_REST = " ${ DEP_INFO #*| } "
DEP_CREATED = " ${ DEP_REST %%|* } "
DEP_TITLE = " ${ DEP_REST #*| } "
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
[ -z " $DEP_CREATED " ] && continue
CREATED_EPOCH = $( date -d " $DEP_CREATED " +%s 2>/dev/null || echo 0)
AGE_DAYS = $(( ( NOW_EPOCH - CREATED_EPOCH) / 86400 ))
if [ " $AGE_DAYS " -gt 30 ] ; then
p3 " ${ proj_name } : Stale dependency: # ${ issue_num } blocked by # ${ dep } \" ${ DEP_TITLE } \" (open ${ AGE_DAYS } days) "
fi
done
done
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
unset DEPS_OF BACKLOG_NUMS NODE_COLOR SEEN_CYCLES DEP_CACHE
2026-03-15 16:05:29 +01:00
fi
2026-03-17 09:31:49 +00:00
# Emit dev metric (paginated to avoid silent cap at 50)
_BACKLOG_COUNT = $( codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues" )
_BLOCKED_COUNT = $( codeberg_count_paginated "/issues?state=open&labels=blocked&type=issues" )
_PR_COUNT = $( codeberg_count_paginated "/pulls?state=open" )
2026-03-17 08:40:56 +00:00
emit_metric " $( jq -nc \
--arg ts " $( date -u +%Y-%m-%dT%H:%MZ) " \
--arg proj " $proj_name " \
--argjson backlog " ${ _BACKLOG_COUNT :- 0 } " \
--argjson blocked " ${ _BLOCKED_COUNT :- 0 } " \
--argjson prs " ${ _PR_COUNT :- 0 } " \
'{ts:$ts,type:"dev",project:$proj,issues_in_backlog:$backlog,issues_blocked:$blocked,pr_open:$prs}' 2>/dev/null) " 2>/dev/null || true
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# ===========================================================================
2026-03-21 19:39:04 +00:00
# P2d: ESCALATE — inject human replies into escalated dev sessions
2026-03-17 22:33:28 +00:00
# ===========================================================================
2026-03-21 19:39:04 +00:00
status " P2: ${ proj_name } : checking escalate sessions "
2026-03-17 22:33:28 +00:00
HUMAN_REPLY_FILE = "/tmp/dev-escalation-reply"
for _nh_phase_file in /tmp/dev-session-" ${ proj_name } " -*.phase; do
[ -f " $_nh_phase_file " ] || continue
_nh_phase = $( head -1 " $_nh_phase_file " 2>/dev/null | tr -d '[:space:]' || true )
2026-03-21 19:39:04 +00:00
[ " $_nh_phase " = "PHASE:escalate" ] || continue
2026-03-17 22:33:28 +00:00
_nh_issue = $( basename " $_nh_phase_file " .phase)
_nh_issue = " ${ _nh_issue #dev-session- ${ proj_name } - } "
[ -z " $_nh_issue " ] && continue
_nh_session = " dev- ${ proj_name } - ${ _nh_issue } "
# Check tmux session is alive
if ! tmux has-session -t " $_nh_session " 2>/dev/null; then
2026-03-21 19:39:04 +00:00
flog " ${ proj_name } : # ${ _nh_issue } phase=escalate but tmux session gone "
2026-03-17 22:33:28 +00:00
continue
fi
2026-03-17 22:40:54 +00:00
# Inject human reply if available (atomic mv to prevent double-injection with gardener)
_nh_claimed = " /tmp/dev-escalation-reply.supervisor. $$ "
if [ -s " $HUMAN_REPLY_FILE " ] && mv " $HUMAN_REPLY_FILE " " $_nh_claimed " 2>/dev/null; then
_nh_reply = $( cat " $_nh_claimed " )
2026-03-17 22:33:28 +00:00
_nh_inject_msg = " Human reply received for issue # ${ _nh_issue } :
${ _nh_reply }
Instructions:
1. Read the human' s guidance carefully.
2. Continue your work based on their input.
2026-03-17 22:40:54 +00:00
3. When done , push your changes and write the appropriate phase."
2026-03-17 22:33:28 +00:00
_nh_tmpfile = $( mktemp /tmp/human-inject-XXXXXX)
printf '%s' " $_nh_inject_msg " > " $_nh_tmpfile "
# All tmux calls guarded: session may die between has-session and here
tmux load-buffer -b " human-inject- ${ _nh_issue } " " $_nh_tmpfile " || true
tmux paste-buffer -t " $_nh_session " -b " human-inject- ${ _nh_issue } " || true
sleep 0.5
tmux send-keys -t " $_nh_session " "" Enter || true
tmux delete-buffer -b " human-inject- ${ _nh_issue } " 2>/dev/null || true
2026-03-17 22:59:05 +00:00
rm -f " $_nh_tmpfile " " $_nh_claimed "
2026-03-17 22:33:28 +00:00
rm -f " /tmp/dev-renotify- ${ proj_name } - ${ _nh_issue } "
flog " ${ proj_name } : # ${ _nh_issue } human reply injected into session ${ _nh_session } "
fixed " ${ proj_name } : Injected human reply into dev session # ${ _nh_issue } "
2026-03-17 22:40:54 +00:00
break # one reply to deliver
2026-03-17 22:33:28 +00:00
else
# No reply yet — check for timeout (re-notify at 6h, alert at 24h)
_nh_mtime = $( stat -c %Y " $_nh_phase_file " 2>/dev/null || echo 0)
_nh_now = $( date +%s)
_nh_age = $(( _nh_now - _nh_mtime ))
if [ " $_nh_age " -gt 86400 ] ; then
2026-03-21 19:39:04 +00:00
p2 " ${ proj_name } : Dev session # ${ _nh_issue } stuck in escalate for >24h "
2026-03-17 22:33:28 +00:00
elif [ " $_nh_age " -gt 21600 ] ; then
_nh_renotify = " /tmp/dev-renotify- ${ proj_name } - ${ _nh_issue } "
if [ ! -f " $_nh_renotify " ] ; then
_nh_age_h = $(( _nh_age / 3600 ))
matrix_send "dev" " ⏰ Reminder: Issue # ${ _nh_issue } still needs human input (waiting ${ _nh_age_h } h) " 2>/dev/null || true
touch " $_nh_renotify "
2026-03-21 19:39:04 +00:00
flog " ${ proj_name } : # ${ _nh_issue } re-notified (escalate for ${ _nh_age_h } h) "
2026-03-17 22:33:28 +00:00
fi
fi
fi
done
# ===========================================================================
2026-03-19 07:51:30 +00:00
# P4-PROJECT: Orphaned tmux sessions — PR/issue closed externally
# ===========================================================================
status " P4: ${ proj_name } : sweeping orphaned dev sessions "
while IFS = read -r _sess; do
[ -z " $_sess " ] && continue
# Extract issue number from dev-{project}-{issue}
_sess_issue = " ${ _sess #dev- " ${ proj_name } " - } "
[ [ " $_sess_issue " = ~ ^[ 0-9] +$ ] ] || continue
# Check Codeberg: is the issue still open?
_issue_state = $( codeberg_api GET " /issues/ ${ _sess_issue } " 2>/dev/null \
| jq -r '.state // "open"' 2>/dev/null || echo "open" )
_should_cleanup = false
_cleanup_reason = ""
if [ " $_issue_state " = "closed" ] ; then
_should_cleanup = true
_cleanup_reason = " issue # ${ _sess_issue } closed externally "
else
2026-03-19 08:11:51 +00:00
# Issue still open — skip cleanup during active-wait phases (no PR yet is normal)
_phase_file = " /tmp/dev-session- ${ proj_name } - ${ _sess_issue } .phase "
_curr_phase = $( head -1 " $_phase_file " 2>/dev/null | tr -d '[:space:]' || true )
case " ${ _curr_phase :- } " in
2026-03-21 19:39:04 +00:00
PHASE:escalate| PHASE:awaiting_ci| PHASE:awaiting_review)
2026-03-19 08:11:51 +00:00
continue # session has legitimate pending work
; ;
esac
# Check if associated PR is open (paginated)
2026-03-19 07:51:30 +00:00
_pr_branch = " fix/issue- ${ _sess_issue } "
2026-03-19 08:11:51 +00:00
_has_open_pr = 0
_pr_page = 1
while true; do
_pr_page_json = $( codeberg_api GET " /pulls?state=open&limit=50&page= ${ _pr_page } " \
2>/dev/null || echo "[]" )
_pr_page_len = $( printf '%s' " $_pr_page_json " | jq 'length' 2>/dev/null || echo 0)
_pr_match = $( printf '%s' " $_pr_page_json " | \
jq --arg b " $_pr_branch " '[.[] | select(.head.ref == $b)] | length' \
2026-03-19 07:51:30 +00:00
2>/dev/null || echo 0)
2026-03-19 08:11:51 +00:00
_has_open_pr = $(( _has_open_pr + ${ _pr_match :- 0 } ))
[ " ${ _has_open_pr :- 0 } " -gt 0 ] && break
[ " ${ _pr_page_len :- 0 } " -lt 50 ] && break
_pr_page = $(( _pr_page + 1 ))
[ " $_pr_page " -gt 20 ] && break
done
2026-03-19 07:51:30 +00:00
2026-03-19 08:11:51 +00:00
if [ " $_has_open_pr " -eq 0 ] ; then
# No open PR — check for a closed/merged PR with this branch (paginated)
_has_closed_pr = 0
_pr_page = 1
while true; do
_pr_page_json = $( codeberg_api GET " /pulls?state=closed&limit=50&page= ${ _pr_page } " \
2>/dev/null || echo "[]" )
_pr_page_len = $( printf '%s' " $_pr_page_json " | jq 'length' 2>/dev/null || echo 0)
_pr_match = $( printf '%s' " $_pr_page_json " | \
jq --arg b " $_pr_branch " '[.[] | select(.head.ref == $b)] | length' \
2>/dev/null || echo 0)
_has_closed_pr = $(( _has_closed_pr + ${ _pr_match :- 0 } ))
[ " ${ _has_closed_pr :- 0 } " -gt 0 ] && break
[ " ${ _pr_page_len :- 0 } " -lt 50 ] && break
_pr_page = $(( _pr_page + 1 ))
[ " $_pr_page " -gt 20 ] && break
done
if [ " $_has_closed_pr " -gt 0 ] ; then
2026-03-19 07:51:30 +00:00
_should_cleanup = true
_cleanup_reason = " PR for issue # ${ _sess_issue } is closed/merged "
else
2026-03-19 08:11:51 +00:00
# No PR at all — clean up if session idle >30min
# On query failure, skip rather than defaulting to epoch 0
if ! _sess_activity = $( tmux display-message -t " $_sess " \
-p '#{session_activity}' 2>/dev/null) ; then
flog " ${ proj_name } : Could not query activity for session ${ _sess } , skipping "
continue
fi
2026-03-19 07:51:30 +00:00
_now_ts = $( date +%s)
2026-03-19 08:11:51 +00:00
_idle_min = $(( ( _now_ts - _sess_activity) / 60 ))
2026-03-19 07:51:30 +00:00
if [ " $_idle_min " -gt 30 ] ; then
_should_cleanup = true
_cleanup_reason = " no PR found, session idle ${ _idle_min } min "
fi
fi
fi
fi
if [ " $_should_cleanup " = true ] ; then
tmux kill-session -t " $_sess " 2>/dev/null || true
_wt = " /tmp/ ${ proj_name } -worktree- ${ _sess_issue } "
if [ -d " $_wt " ] ; then
git -C " $PROJECT_REPO_ROOT " worktree remove --force " $_wt " 2>/dev/null || true
fi
# Remove lock only if its recorded PID is no longer alive
_lock = " /tmp/dev-agent- ${ proj_name } .lock "
if [ -f " $_lock " ] ; then
_lock_pid = $( cat " $_lock " 2>/dev/null || true )
if [ -n " ${ _lock_pid :- } " ] && ! kill -0 " $_lock_pid " 2>/dev/null; then
rm -f " $_lock "
fi
fi
rm -f " /tmp/dev-session- ${ proj_name } - ${ _sess_issue } .phase "
fixed " ${ proj_name } : Cleaned orphaned session ${ _sess } ( ${ _cleanup_reason } ) "
fi
done < <( tmux list-sessions -F '#{session_name}' 2>/dev/null | grep " ^dev- ${ proj_name } - " || true )
# ===========================================================================
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# P4-PROJECT: Clean stale worktrees for this project
# ===========================================================================
NOW_TS = $( date +%s)
2026-03-20 19:45:21 +00:00
for wt in /tmp/${ PROJECT_NAME } -worktree-* /tmp/${ PROJECT_NAME } -review-* /tmp/${ PROJECT_NAME } -sup-retry-*; do
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
[ -d " $wt " ] || continue
WT_AGE_MIN = $(( ( NOW_TS - $( stat -c %Y " $wt " ) ) / 60 ))
if [ " $WT_AGE_MIN " -gt 120 ] ; then
WT_BASE = $( basename " $wt " )
if ! pgrep -f " $WT_BASE " >/dev/null 2>& 1; then
git -C " $PROJECT_REPO_ROOT " worktree remove --force " $wt " 2>/dev/null && \
fixed " ${ proj_name } : Removed stale worktree: $wt ( ${ WT_AGE_MIN } min old) " || true
fi
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
fi
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
done
git -C " $PROJECT_REPO_ROOT " worktree prune 2>/dev/null || true
}
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# =============================================================================
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
# Iterate over all registered projects
2026-03-12 12:44:15 +00:00
# =============================================================================
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
status "checking projects"
PROJECT_COUNT = 0
if [ -d " $PROJECTS_DIR " ] ; then
for project_toml in " ${ PROJECTS_DIR } " /*.toml; do
[ -f " $project_toml " ] || continue
PROJECT_COUNT = $(( PROJECT_COUNT + 1 ))
# Load project config (overrides CODEBERG_REPO, PROJECT_REPO_ROOT, etc.)
source " ${ FACTORY_ROOT } /lib/load-project.sh " " $project_toml "
2026-03-19 09:16:56 +00:00
check_project || flog " check_project failed for ${ project_toml } (per-project checks incomplete) "
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
done
fi
if [ " $PROJECT_COUNT " -eq 0 ] ; then
# Fallback: no project TOML files, use .env config (backwards compatible)
flog "No projects/*.toml found, using .env defaults"
2026-03-19 09:16:56 +00:00
check_project || flog "check_project failed with .env defaults (per-project checks incomplete)"
refactor: split supervisor into infra + per-project, make poll scripts config-driven
Supervisor split (#26):
- Layer 1 (infra): P0 memory, P1 disk, P4 housekeeping — runs once, project-agnostic
- Layer 2 (per-project): P2 CI/dev-agent, P3 PRs/deps — iterates projects/*.toml
- Adding a new project requires only a new TOML file, no code changes
Poll scripts accept project TOML arg (#27):
- dev-poll.sh, review-poll.sh, gardener-poll.sh accept optional project TOML as $1
- env.sh loads PROJECT_TOML if set, overriding .env defaults
- Cron: `dev-poll.sh projects/versi.toml` targets that project
New files:
- lib/load-project.sh: TOML to env var loader (Python tomllib)
- projects/versi.toml: current project config extracted from .env
Backwards compatible: scripts without a TOML arg fall back to .env config.
Closes #26, Closes #27
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 08:57:18 +01:00
fi
# #############################################################################
# RESULT
# #############################################################################
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
ALL_ALERTS = " ${ P0_ALERTS } ${ P1_ALERTS } ${ P2_ALERTS } ${ P3_ALERTS } ${ P4_ALERTS } "
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if [ -n " $ALL_ALERTS " ] ; then
ALERT_TEXT = $( echo -e " $ALL_ALERTS " )
2026-03-12 12:44:15 +00:00
2026-03-14 16:25:33 +01:00
# Notify Matrix
2026-03-15 18:06:25 +01:00
matrix_send "supervisor" " ⚠️ Supervisor alerts:
2026-03-14 16:25:33 +01:00
${ ALERT_TEXT } " 2>/dev/null || true
2026-03-12 13:04:50 +00:00
flog "Invoking claude -p for alerts"
2026-03-12 12:44:15 +00:00
2026-03-15 18:06:25 +01:00
CLAUDE_PROMPT = " $( cat " $PROMPT_FILE " 2>/dev/null || echo "You are a supervisor agent. Fix the issue below." )
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
2026-03-12 13:04:50 +00:00
## Current Alerts
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
${ ALERT_TEXT }
2026-03-12 13:04:50 +00:00
## Auto-fixes already applied by bash
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
$( echo -e " ${ FIXES :- None } " )
## System State
RAM: $( free -m | awk '/Mem:/{printf "avail=%sMB", $7}' ) $( free -m | awk '/Swap:/{printf "swap=%sMB", $3}' )
Disk: $( df -h / | awk 'NR==2{printf "%s used of %s (%s)", $3, $2, $5}' )
Docker: $( sudo docker ps --format '{{.Names}}' 2>/dev/null | wc -l) containers running
Claude procs: $( pgrep -f "claude" 2>/dev/null | wc -l)
2026-03-14 16:25:33 +01:00
$( if [ -n " $ESCALATION_REPLY " ] ; then echo "
## Human Response to Previous Escalation
${ ESCALATION_REPLY }
Act on this response." ; fi)
2026-03-12 13:04:50 +00:00
Fix what you can. Escalate what you can' t. Read the relevant best-practices file first."
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
2026-03-12 13:04:50 +00:00
CLAUDE_OUTPUT = $( timeout 300 claude -p --model sonnet --dangerously-skip-permissions \
" $CLAUDE_PROMPT " 2>& 1) || true
flog " claude output: $( echo " $CLAUDE_OUTPUT " | tail -20) "
status "claude responded"
2026-03-12 12:44:15 +00:00
else
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
[ -n " $FIXES " ] && flog " Housekeeping: $( echo -e " $FIXES " ) "
2026-03-12 12:44:15 +00:00
status "all clear"
fi