2026-03-12 12:44:15 +00:00
#!/usr/bin/env bash
# factory-poll.sh — Factory supervisor: bash checks + claude -p for fixes
#
# Runs every 10min via cron. Does all health checks in bash (zero tokens).
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# Only invokes claude -p when auto-fix fails or issue is complex.
2026-03-12 12:44:15 +00:00
#
2026-03-15 17:57:12 +01:00
# Cron: */10 * * * * /path/to/disinto/factory/factory-poll.sh
2026-03-12 12:44:15 +00:00
#
# Peek: cat /tmp/factory-status
2026-03-15 17:57:12 +01:00
# Log: tail -f /path/to/disinto/factory/factory.log
2026-03-12 12:44:15 +00:00
source " $( dirname " $0 " ) /../lib/env.sh "
LOGFILE = " ${ FACTORY_ROOT } /factory/factory.log "
STATUSFILE = "/tmp/factory-status"
LOCKFILE = "/tmp/factory-poll.lock"
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
PROMPT_FILE = " ${ FACTORY_ROOT } /factory/PROMPT.md "
2026-03-12 12:44:15 +00:00
# Prevent overlapping runs
if [ -f " $LOCKFILE " ] ; then
LOCK_PID = $( cat " $LOCKFILE " 2>/dev/null)
if kill -0 " $LOCK_PID " 2>/dev/null; then
exit 0
fi
rm -f " $LOCKFILE "
fi
echo $$ > " $LOCKFILE "
trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
flog( ) {
printf '[%s] %s\n' " $( date -u '+%Y-%m-%d %H:%M:%S UTC' ) " " $* " >> " $LOGFILE "
}
2026-03-12 12:44:15 +00:00
status( ) {
printf '[%s] factory: %s\n' " $( date -u '+%Y-%m-%d %H:%M:%S UTC' ) " " $* " > " $STATUSFILE "
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
flog " $* "
2026-03-12 12:44:15 +00:00
}
2026-03-14 16:25:33 +01:00
# ── Check for escalation replies from Matrix ──────────────────────────────
ESCALATION_REPLY = ""
if [ -s /tmp/factory-escalation-reply ] ; then
ESCALATION_REPLY = $( cat /tmp/factory-escalation-reply)
rm -f /tmp/factory-escalation-reply
flog " Got escalation reply: $( echo " $ESCALATION_REPLY " | head -1) "
fi
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# Alerts by priority
P0_ALERTS = ""
P1_ALERTS = ""
P2_ALERTS = ""
P3_ALERTS = ""
P4_ALERTS = ""
p0( ) { P0_ALERTS = " ${ P0_ALERTS } • [P0] $* \n " ; flog " P0: $* " ; }
p1( ) { P1_ALERTS = " ${ P1_ALERTS } • [P1] $* \n " ; flog " P1: $* " ; }
p2( ) { P2_ALERTS = " ${ P2_ALERTS } • [P2] $* \n " ; flog " P2: $* " ; }
p3( ) { P3_ALERTS = " ${ P3_ALERTS } • [P3] $* \n " ; flog " P3: $* " ; }
p4( ) { P4_ALERTS = " ${ P4_ALERTS } • [P4] $* \n " ; flog " P4: $* " ; }
FIXES = ""
fixed( ) { FIXES = " ${ FIXES } • ✅ $* \n " ; flog " FIXED: $* " ; }
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# P0: MEMORY — check first, fix first
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
status "P0: checking memory"
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
AVAIL_MB = $( free -m | awk '/Mem:/{print $7}' )
SWAP_USED_MB = $( free -m | awk '/Swap:/{print $3}' )
2026-03-12 12:44:15 +00:00
2026-03-12 13:16:24 +00:00
if [ " ${ AVAIL_MB :- 9999 } " -lt 500 ] || { [ " ${ SWAP_USED_MB :- 0 } " -gt 3000 ] && [ " ${ AVAIL_MB :- 9999 } " -lt 2000 ] ; } ; then
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
flog " MEMORY CRISIS: avail= ${ AVAIL_MB } MB swap_used= ${ SWAP_USED_MB } MB — auto-fixing "
2026-03-15 10:39:40 +01:00
# Kill stale factory-spawned claude processes (>3h old) — skip interactive sessions
STALE_CLAUDES = $( pgrep -f "claude -p" --older 10800 2>/dev/null || true )
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if [ -n " $STALE_CLAUDES " ] ; then
echo " $STALE_CLAUDES " | xargs kill 2>/dev/null || true
fixed " Killed stale claude processes: ${ STALE_CLAUDES } "
fi
# Drop filesystem caches
sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>& 1
fixed "Dropped filesystem caches"
# Restart Anvil if it's bloated (>1GB RSS)
2026-03-14 13:49:09 +01:00
ANVIL_CONTAINER = " ${ ANVIL_CONTAINER :- ${ PROJECT_NAME } -anvil-1 } "
ANVIL_RSS = $( sudo docker stats " $ANVIL_CONTAINER " --no-stream --format '{{.MemUsage}}' 2>/dev/null | grep -oP '^\S+' | head -1 || echo "0" )
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if echo " $ANVIL_RSS " | grep -qP '\dGiB' ; then
2026-03-14 13:49:09 +01:00
sudo docker restart " $ANVIL_CONTAINER " >/dev/null 2>& 1 && fixed " Restarted bloated Anvil ( ${ ANVIL_RSS } ) "
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
fi
# Re-check after fixes
AVAIL_MB_AFTER = $( free -m | awk '/Mem:/{print $7}' )
SWAP_AFTER = $( free -m | awk '/Swap:/{print $3}' )
if [ " ${ AVAIL_MB_AFTER :- 0 } " -lt 500 ] || [ " ${ SWAP_AFTER :- 0 } " -gt 3000 ] ; then
p0 " Memory still critical after auto-fix: avail= ${ AVAIL_MB_AFTER } MB swap= ${ SWAP_AFTER } MB "
else
flog " Memory recovered: avail= ${ AVAIL_MB_AFTER } MB swap= ${ SWAP_AFTER } MB "
fi
fi
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# P1: DISK
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
status "P1: checking disk"
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
DISK_PERCENT = $( df -h / | awk 'NR==2{print $5}' | tr -d '%' )
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if [ " ${ DISK_PERCENT :- 0 } " -gt 80 ] ; then
flog " DISK PRESSURE: ${ DISK_PERCENT } % — auto-cleaning "
# Docker cleanup (safe — keeps images)
sudo docker system prune -f >/dev/null 2>& 1 && fixed "Docker prune"
# Truncate factory logs >10MB
for logfile in " ${ FACTORY_ROOT } " /{ dev,review,factory} /*.log; do
if [ -f " $logfile " ] ; then
SIZE_KB = $( du -k " $logfile " 2>/dev/null | cut -f1)
if [ " ${ SIZE_KB :- 0 } " -gt 10240 ] ; then
truncate -s 0 " $logfile "
fixed " Truncated $( basename " $logfile " ) (was ${ SIZE_KB } KB) "
fi
fi
done
# Clean old worktrees
2026-03-14 13:49:09 +01:00
IDLE_WORKTREES = $( find /tmp/${ PROJECT_NAME } -worktree-* -maxdepth 0 -mmin +360 2>/dev/null || true )
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if [ -n " $IDLE_WORKTREES " ] ; then
2026-03-14 13:49:09 +01:00
cd " ${ PROJECT_REPO_ROOT } " && git worktree prune 2>/dev/null
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
for wt in $IDLE_WORKTREES ; do
# Only remove if dev-agent is not running on it
2026-03-14 13:49:09 +01:00
ISSUE_NUM = $( basename " $wt " | sed " s/ ${ PROJECT_NAME } -worktree-// " )
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if ! pgrep -f " dev-agent.sh ${ ISSUE_NUM } " >/dev/null 2>& 1; then
rm -rf " $wt " && fixed " Removed stale worktree: $wt "
2026-03-12 12:44:15 +00:00
fi
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
done
fi
# Woodpecker log_entries cleanup
LOG_ENTRIES_MB = $( wpdb -c "SELECT pg_size_pretty(pg_total_relation_size('log_entries'));" 2>/dev/null | xargs)
if echo " $LOG_ENTRIES_MB " | grep -qP '\d+\s*(GB|MB)' ; then
SIZE_NUM = $( echo " $LOG_ENTRIES_MB " | grep -oP '\d+' )
SIZE_UNIT = $( echo " $LOG_ENTRIES_MB " | grep -oP '(GB|MB)' )
if [ " $SIZE_UNIT " = "GB" ] || { [ " $SIZE_UNIT " = "MB" ] && [ " $SIZE_NUM " -gt 500 ] ; } ; then
wpdb -c "DELETE FROM log_entries WHERE id < (SELECT max(id) - 100000 FROM log_entries);" 2>/dev/null
fixed " Trimmed Woodpecker log_entries (was ${ LOG_ENTRIES_MB } ) "
2026-03-12 12:44:15 +00:00
fi
fi
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
DISK_AFTER = $( df -h / | awk 'NR==2{print $5}' | tr -d '%' )
if [ " ${ DISK_AFTER :- 0 } " -gt 80 ] ; then
p1 " Disk still ${ DISK_AFTER } % after auto-clean "
else
flog " Disk recovered: ${ DISK_AFTER } % "
fi
fi
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# P2: FACTORY STOPPED — CI, dev-agent, git
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
status "P2: checking factory"
# CI stuck
2026-03-14 16:25:33 +01:00
STUCK_CI = $( wpdb -c " SELECT count(*) FROM pipelines WHERE repo_id= ${ WOODPECKER_REPO_ID } AND status='running' AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200; " 2>/dev/null | xargs || true )
[ " ${ STUCK_CI :- 0 } " -gt 0 ] 2>/dev/null && p2 " CI: ${ STUCK_CI } pipeline(s) running >20min "
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
2026-03-14 16:25:33 +01:00
PENDING_CI = $( wpdb -c " SELECT count(*) FROM pipelines WHERE repo_id= ${ WOODPECKER_REPO_ID } AND status='pending' AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800; " 2>/dev/null | xargs || true )
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
[ " ${ PENDING_CI :- 0 } " -gt 0 ] && p2 " CI: ${ PENDING_CI } pipeline(s) pending >30min "
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# Dev-agent health
2026-03-12 12:44:15 +00:00
DEV_LOCK = "/tmp/dev-agent.lock"
if [ -f " $DEV_LOCK " ] ; then
DEV_PID = $( cat " $DEV_LOCK " 2>/dev/null)
if ! kill -0 " $DEV_PID " 2>/dev/null; then
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
rm -f " $DEV_LOCK "
fixed " Removed stale dev-agent lock (PID ${ DEV_PID } dead) "
2026-03-12 12:44:15 +00:00
else
DEV_STATUS_AGE = $( stat -c %Y /tmp/dev-agent-status 2>/dev/null || echo 0)
NOW_EPOCH = $( date +%s)
STATUS_AGE_MIN = $(( ( NOW_EPOCH - DEV_STATUS_AGE) / 60 ))
if [ " $STATUS_AGE_MIN " -gt 30 ] ; then
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
p2 " Dev-agent: status unchanged for ${ STATUS_AGE_MIN } min "
2026-03-12 12:44:15 +00:00
fi
fi
fi
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# Git repo health
2026-03-14 13:49:09 +01:00
cd " ${ PROJECT_REPO_ROOT } " 2>/dev/null || true
2026-03-12 12:44:15 +00:00
GIT_BRANCH = $( git branch --show-current 2>/dev/null || echo "unknown" )
GIT_REBASE = $( [ -d .git/rebase-merge ] || [ -d .git/rebase-apply ] && echo "yes" || echo "no" )
if [ " $GIT_REBASE " = "yes" ] ; then
2026-03-14 13:49:09 +01:00
git rebase --abort 2>/dev/null && git checkout " ${ PRIMARY_BRANCH } " 2>/dev/null && \
fixed " Aborted stale rebase, switched to ${ PRIMARY_BRANCH } " || \
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
p2 "Git: stale rebase, auto-abort failed"
2026-03-12 12:44:15 +00:00
fi
2026-03-14 13:49:09 +01:00
if [ " $GIT_BRANCH " != " ${ PRIMARY_BRANCH } " ] && [ " $GIT_BRANCH " != "unknown" ] ; then
git checkout " ${ PRIMARY_BRANCH } " 2>/dev/null && \
fixed " Switched main repo from ' ${ GIT_BRANCH } ' to ${ PRIMARY_BRANCH } " || \
p2 " Git: on ' ${ GIT_BRANCH } ' instead of ${ PRIMARY_BRANCH } "
2026-03-12 12:44:15 +00:00
fi
# =============================================================================
2026-03-12 18:06:08 +00:00
# P2b: FACTORY STALLED — backlog exists but no agent running
# =============================================================================
status "P2: checking factory stall"
BACKLOG_COUNT = $( codeberg_api GET "/issues?state=open&labels=backlog&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0" )
IN_PROGRESS = $( codeberg_api GET "/issues?state=open&labels=in-progress&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0" )
if [ " ${ BACKLOG_COUNT :- 0 } " -gt 0 ] && [ " ${ IN_PROGRESS :- 0 } " -eq 0 ] ; then
# Backlog exists but nothing in progress — check if dev-agent ran recently
DEV_LOG = " ${ FACTORY_ROOT } /dev/dev-agent.log "
if [ -f " $DEV_LOG " ] ; then
LAST_LOG_EPOCH = $( stat -c %Y " $DEV_LOG " 2>/dev/null || echo 0)
else
LAST_LOG_EPOCH = 0
fi
NOW_EPOCH = $( date +%s)
IDLE_MIN = $(( ( NOW_EPOCH - LAST_LOG_EPOCH) / 60 ))
if [ " $IDLE_MIN " -gt 20 ] ; then
p2 " Factory stalled: ${ BACKLOG_COUNT } backlog issue(s), no agent ran for ${ IDLE_MIN } min "
fi
fi
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# P3: FACTORY DEGRADED — derailed PRs, unreviewed PRs
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
status "P3: checking PRs"
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
OPEN_PRS = $( codeberg_api GET "/pulls?state=open&limit=10" 2>/dev/null | jq -r '.[].number' 2>/dev/null || true )
2026-03-12 12:44:15 +00:00
for pr in $OPEN_PRS ; do
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
PR_JSON = $( codeberg_api GET " /pulls/ ${ pr } " 2>/dev/null || true )
[ -z " $PR_JSON " ] && continue
PR_SHA = $( echo " $PR_JSON " | jq -r '.head.sha // ""' )
2026-03-12 12:44:15 +00:00
[ -z " $PR_SHA " ] && continue
CI_STATE = $( codeberg_api GET " /commits/ ${ PR_SHA } /status " 2>/dev/null | jq -r '.state // "unknown"' 2>/dev/null || true )
2026-03-13 06:36:23 +00:00
# Check for merge conflicts first (approved + CI pass but unmergeable)
MERGEABLE = $( echo " $PR_JSON " | jq -r '.mergeable // true' )
if [ " $MERGEABLE " = "false" ] && [ " $CI_STATE " = "success" ] ; then
p3 " PR # ${ pr } : CI pass but merge conflict — needs rebase "
elif [ " $CI_STATE " = "failure" ] || [ " $CI_STATE " = "error" ] ; then
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
UPDATED = $( echo " $PR_JSON " | jq -r '.updated_at // ""' )
if [ -n " $UPDATED " ] ; then
UPDATED_EPOCH = $( date -d " $UPDATED " +%s 2>/dev/null || echo 0)
2026-03-12 12:44:15 +00:00
NOW_EPOCH = $( date +%s)
AGE_MIN = $(( ( NOW_EPOCH - UPDATED_EPOCH) / 60 ))
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
[ " $AGE_MIN " -gt 30 ] && p3 " PR # ${ pr } : CI= ${ CI_STATE } , stale ${ AGE_MIN } min "
fi
elif [ " $CI_STATE " = "success" ] ; then
# Check if reviewed at this SHA
HAS_REVIEW = $( codeberg_api GET " /issues/ ${ pr } /comments?limit=50 " 2>/dev/null | \
jq -r --arg sha " $PR_SHA " '[.[] | select(.body | contains("<!-- reviewed: " + $sha))] | length' 2>/dev/null || echo "0" )
if [ " ${ HAS_REVIEW :- 0 } " -eq 0 ] ; then
UPDATED = $( echo " $PR_JSON " | jq -r '.updated_at // ""' )
if [ -n " $UPDATED " ] ; then
UPDATED_EPOCH = $( date -d " $UPDATED " +%s 2>/dev/null || echo 0)
NOW_EPOCH = $( date +%s)
AGE_MIN = $(( ( NOW_EPOCH - UPDATED_EPOCH) / 60 ))
if [ " $AGE_MIN " -gt 60 ] ; then
p3 " PR # ${ pr } : CI passed, no review for ${ AGE_MIN } min "
# Auto-trigger review
bash " ${ FACTORY_ROOT } /review/review-pr.sh " " $pr " >> " ${ FACTORY_ROOT } /review/review.log " 2>& 1 &
fixed " Auto-triggered review for PR # ${ pr } "
fi
2026-03-12 12:44:15 +00:00
fi
fi
fi
done
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# P4: HOUSEKEEPING — stale processes
# =============================================================================
2026-03-13 06:51:53 +00:00
# Check for dev-agent escalations
ESCALATION_FILE = " ${ FACTORY_ROOT } /factory/escalations.jsonl "
if [ -s " $ESCALATION_FILE " ] ; then
ESCALATION_COUNT = $( wc -l < " $ESCALATION_FILE " )
p3 " Dev-agent escalated ${ ESCALATION_COUNT } issue(s) — see ${ ESCALATION_FILE } "
fi
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
status "P4: housekeeping"
2026-03-15 10:39:40 +01:00
# Stale factory-spawned claude processes (>3h, not caught by P0) — skip interactive sessions
STALE_CLAUDES = $( pgrep -f "claude -p" --older 10800 2>/dev/null || true )
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if [ -n " $STALE_CLAUDES " ] ; then
echo " $STALE_CLAUDES " | xargs kill 2>/dev/null || true
fixed " Killed stale claude processes: $( echo $STALE_CLAUDES | wc -w) procs "
fi
2026-03-15 16:05:29 +01:00
# Clean stale git worktrees (>2h, no active agent)
NOW_TS = $( date +%s)
for wt in /tmp/${ PROJECT_NAME } -worktree-* /tmp/${ PROJECT_NAME } -review-*; do
[ -d " $wt " ] || continue
WT_AGE_MIN = $(( ( NOW_TS - $( stat -c %Y " $wt " ) ) / 60 ))
if [ " $WT_AGE_MIN " -gt 120 ] ; then
# Skip if an agent is still using it
WT_BASE = $( basename " $wt " )
if ! pgrep -f " $WT_BASE " >/dev/null 2>& 1; then
git -C " $PROJECT_REPO_ROOT " worktree remove --force " $wt " 2>/dev/null && \
fixed " Removed stale worktree: $wt ( ${ WT_AGE_MIN } min old) " || true
fi
fi
done
git -C " $PROJECT_REPO_ROOT " worktree prune 2>/dev/null || true
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
# Rotate factory log if >5MB
for logfile in " ${ FACTORY_ROOT } " /{ dev,review,factory} /*.log; do
if [ -f " $logfile " ] ; then
SIZE_KB = $( du -k " $logfile " 2>/dev/null | cut -f1)
if [ " ${ SIZE_KB :- 0 } " -gt 5120 ] ; then
mv " $logfile " " ${ logfile } .old " 2>/dev/null
fixed " Rotated $( basename " $logfile " ) "
fi
fi
done
# =============================================================================
# RESULT
2026-03-12 12:44:15 +00:00
# =============================================================================
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
ALL_ALERTS = " ${ P0_ALERTS } ${ P1_ALERTS } ${ P2_ALERTS } ${ P3_ALERTS } ${ P4_ALERTS } "
2026-03-12 12:44:15 +00:00
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
if [ -n " $ALL_ALERTS " ] ; then
ALERT_TEXT = $( echo -e " $ALL_ALERTS " )
2026-03-12 12:44:15 +00:00
2026-03-14 16:25:33 +01:00
# Notify Matrix
matrix_send "supervisor" " ⚠️ Factory alerts:
${ ALERT_TEXT } " 2>/dev/null || true
2026-03-12 13:04:50 +00:00
flog "Invoking claude -p for alerts"
2026-03-12 12:44:15 +00:00
2026-03-12 13:04:50 +00:00
CLAUDE_PROMPT = " $( cat " $PROMPT_FILE " 2>/dev/null || echo "You are a factory supervisor. Fix the issue below." )
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
2026-03-12 13:04:50 +00:00
## Current Alerts
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
${ ALERT_TEXT }
2026-03-12 13:04:50 +00:00
## Auto-fixes already applied by bash
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
$( echo -e " ${ FIXES :- None } " )
## System State
RAM: $( free -m | awk '/Mem:/{printf "avail=%sMB", $7}' ) $( free -m | awk '/Swap:/{printf "swap=%sMB", $3}' )
Disk: $( df -h / | awk 'NR==2{printf "%s used of %s (%s)", $3, $2, $5}' )
Docker: $( sudo docker ps --format '{{.Names}}' 2>/dev/null | wc -l) containers running
Claude procs: $( pgrep -f "claude" 2>/dev/null | wc -l)
2026-03-14 16:25:33 +01:00
$( if [ -n " $ESCALATION_REPLY " ] ; then echo "
## Human Response to Previous Escalation
${ ESCALATION_REPLY }
Act on this response." ; fi)
2026-03-12 13:04:50 +00:00
Fix what you can. Escalate what you can' t. Read the relevant best-practices file first."
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
2026-03-12 13:04:50 +00:00
CLAUDE_OUTPUT = $( timeout 300 claude -p --model sonnet --dangerously-skip-permissions \
" $CLAUDE_PROMPT " 2>& 1) || true
flog " claude output: $( echo " $CLAUDE_OUTPUT " | tail -20) "
status "claude responded"
2026-03-12 12:44:15 +00:00
else
feat: factory supervisor with priorities, auto-fix, and claude -p escalation
- P0: memory crisis (auto-kill stale claude, drop caches, restart Anvil)
- P1: disk pressure (docker prune, log truncate, worktree cleanup, WP log trim)
- P2: factory stopped (CI stuck, dev-agent dead, git broken — auto-fix where possible)
- P3: factory degraded (derailed PRs, auto-trigger reviews)
- P4: housekeeping (stale processes, log rotation)
Calls claude -p only for P0/P1 issues that auto-fix couldn't resolve.
PROMPT.md contains distilled operational knowledge + self-update mechanism.
2026-03-12 13:00:17 +00:00
[ -n " $FIXES " ] && flog " Housekeeping: $( echo -e " $FIXES " ) "
2026-03-12 12:44:15 +00:00
status "all clear"
fi