Add OPS repo presence detection in supervisor-run.sh with degraded mode support: - Detect if OPS_REPO_ROOT is missing and log WARNING message - Set OPS_REPO_DEGRADED=1 flag and configure fallback paths - Bundle minimal knowledge files as fallback for degraded mode - Update formula to use OPS_KNOWLEDGE_ROOT, OPS_JOURNAL_ROOT, OPS_VAULT_ROOT - Support local vault destination and journal fallback when ops repo absent Knowledge files bundled: disk.md, memory.md, ci.md, git.md, dev-agent.md, review-agent.md, forge.md The supervisor now runs with full functionality when ops repo is available, or gracefully degrades to local paths when absent, making the failure mode explicit rather than silent.
175 lines
7.6 KiB
Bash
Executable file
175 lines
7.6 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# supervisor-run.sh — Cron wrapper: supervisor execution via SDK + formula
|
|
#
|
|
# Synchronous bash loop using claude -p (one-shot invocation).
|
|
# No tmux sessions, no phase files — the bash script IS the state machine.
|
|
#
|
|
# Flow:
|
|
# 1. Guards: cron lock, memory check
|
|
# 2. Housekeeping: clean up stale crashed worktrees
|
|
# 3. Collect pre-flight metrics (supervisor/preflight.sh)
|
|
# 4. Load formula (formulas/run-supervisor.toml)
|
|
# 5. Context: AGENTS.md, preflight metrics, structural graph
|
|
# 6. agent_run(worktree, prompt) → Claude monitors, may clean up
|
|
#
|
|
# Usage:
|
|
# supervisor-run.sh [projects/disinto.toml] # project config (default: disinto)
|
|
#
|
|
# Cron: */20 * * * * cd /path/to/dark-factory && bash supervisor/supervisor-run.sh
|
|
# =============================================================================
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
|
|
|
|
# Accept project config from argument; default to disinto
|
|
export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}"
|
|
# shellcheck source=../lib/env.sh
|
|
source "$FACTORY_ROOT/lib/env.sh"
|
|
# Use supervisor-bot's own Forgejo identity (#747)
|
|
FORGE_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}"
|
|
# shellcheck source=../lib/formula-session.sh
|
|
source "$FACTORY_ROOT/lib/formula-session.sh"
|
|
# shellcheck source=../lib/worktree.sh
|
|
source "$FACTORY_ROOT/lib/worktree.sh"
|
|
# shellcheck source=../lib/guard.sh
|
|
source "$FACTORY_ROOT/lib/guard.sh"
|
|
# shellcheck source=../lib/agent-sdk.sh
|
|
source "$FACTORY_ROOT/lib/agent-sdk.sh"
|
|
|
|
LOG_FILE="${DISINTO_LOG_DIR}/supervisor/supervisor.log"
|
|
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
|
LOGFILE="$LOG_FILE"
|
|
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
|
SID_FILE="/tmp/supervisor-session-${PROJECT_NAME}.sid"
|
|
SCRATCH_FILE="/tmp/supervisor-${PROJECT_NAME}-scratch.md"
|
|
WORKTREE="/tmp/${PROJECT_NAME}-supervisor-run"
|
|
|
|
# Override LOG_AGENT for consistent agent identification
|
|
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
|
|
LOG_AGENT="supervisor"
|
|
|
|
# ── OPS Repo Detection (Issue #544) ──────────────────────────────────────
|
|
# Detect if OPS_REPO_ROOT is available and set degraded mode flag if not.
|
|
# This allows the supervisor to run with fallback knowledge files and
|
|
# local journal/vault paths when the ops repo is absent.
|
|
if [ -z "${OPS_REPO_ROOT:-}" ] || [ ! -d "${OPS_REPO_ROOT}" ]; then
|
|
log "WARNING: OPS_REPO_ROOT not set or directory missing — running in degraded mode (no playbooks, no journal continuity, no vault destination)"
|
|
export OPS_REPO_DEGRADED=1
|
|
# Set fallback paths for degraded mode
|
|
export OPS_KNOWLEDGE_ROOT="${FACTORY_ROOT}/knowledge"
|
|
export OPS_JOURNAL_ROOT="${FACTORY_ROOT}/state/supervisor-journal"
|
|
export OPS_VAULT_ROOT="${PROJECT_REPO_ROOT}/vault/pending"
|
|
mkdir -p "$OPS_JOURNAL_ROOT" "$OPS_VAULT_ROOT" 2>/dev/null || true
|
|
else
|
|
export OPS_REPO_DEGRADED=0
|
|
export OPS_KNOWLEDGE_ROOT="${OPS_REPO_ROOT}/knowledge"
|
|
export OPS_JOURNAL_ROOT="${OPS_REPO_ROOT}/journal/supervisor"
|
|
export OPS_VAULT_ROOT="${OPS_REPO_ROOT}/vault/pending"
|
|
mkdir -p "$OPS_JOURNAL_ROOT" "$OPS_VAULT_ROOT" 2>/dev/null || true
|
|
fi
|
|
|
|
# Override log() to append to supervisor-specific log file
|
|
# shellcheck disable=SC2034
|
|
log() {
|
|
local agent="${LOG_AGENT:-supervisor}"
|
|
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
|
|
}
|
|
|
|
# ── Guards ────────────────────────────────────────────────────────────────
|
|
check_active supervisor
|
|
acquire_cron_lock "/tmp/supervisor-run.lock"
|
|
memory_guard 2000
|
|
|
|
log "--- Supervisor run start ---"
|
|
|
|
# ── Resolve forge remote for git operations ─────────────────────────────
|
|
resolve_forge_remote
|
|
|
|
# ── Housekeeping: clean up stale crashed worktrees (>24h) ────────────────
|
|
cleanup_stale_crashed_worktrees 24
|
|
|
|
# ── Resolve agent identity for .profile repo ────────────────────────────
|
|
resolve_agent_identity || true
|
|
|
|
# ── Collect pre-flight metrics ────────────────────────────────────────────
|
|
log "Running preflight.sh"
|
|
PREFLIGHT_OUTPUT=""
|
|
PREFLIGHT_RC=0
|
|
if PREFLIGHT_OUTPUT=$(bash "$SCRIPT_DIR/preflight.sh" "$PROJECT_TOML" 2>&1); then
|
|
log "Preflight collected ($(echo "$PREFLIGHT_OUTPUT" | wc -l) lines)"
|
|
else
|
|
PREFLIGHT_RC=$?
|
|
log "WARNING: preflight.sh failed (exit code $PREFLIGHT_RC), continuing with partial data"
|
|
if [ -n "$PREFLIGHT_OUTPUT" ]; then
|
|
log "Preflight error: $(echo "$PREFLIGHT_OUTPUT" | tail -3)"
|
|
fi
|
|
fi
|
|
|
|
# ── Load formula + context ───────────────────────────────────────────────
|
|
load_formula_or_profile "supervisor" "$FACTORY_ROOT/formulas/run-supervisor.toml" || exit 1
|
|
build_context_block AGENTS.md
|
|
|
|
# ── Prepare .profile context (lessons injection) ─────────────────────────
|
|
formula_prepare_profile_context
|
|
|
|
# ── Read scratch file (compaction survival) ───────────────────────────────
|
|
SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE")
|
|
SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE")
|
|
|
|
# ── Build prompt ─────────────────────────────────────────────────────────
|
|
build_sdk_prompt_footer
|
|
export CLAUDE_MODEL="sonnet"
|
|
|
|
# ── Create worktree (before prompt assembly so trap is set early) ────────
|
|
formula_worktree_setup "$WORKTREE"
|
|
|
|
# Inject OPS repo status into prompt
|
|
if [ "${OPS_REPO_DEGRADED:-0}" = "1" ]; then
|
|
OPS_STATUS="
|
|
## OPS Repo Status
|
|
**DEGRADED MODE**: OPS repo is not available. Using bundled knowledge files and local journal/vault paths.
|
|
- Knowledge files: ${OPS_KNOWLEDGE_ROOT:-<unset>}
|
|
- Journal: ${OPS_JOURNAL_ROOT:-<unset>}
|
|
- Vault destination: ${OPS_VAULT_ROOT:-<unset>}
|
|
"
|
|
else
|
|
OPS_STATUS="
|
|
## OPS Repo Status
|
|
**FULL MODE**: OPS repo available at ${OPS_REPO_ROOT}
|
|
- Knowledge files: ${OPS_KNOWLEDGE_ROOT:-<unset>}
|
|
- Journal: ${OPS_JOURNAL_ROOT:-<unset>}
|
|
- Vault destination: ${OPS_VAULT_ROOT:-<unset>}
|
|
"
|
|
fi
|
|
|
|
PROMPT="You are the supervisor agent for ${FORGE_REPO}. Work through the formula below.
|
|
|
|
You have full shell access and --dangerously-skip-permissions.
|
|
Fix what you can. File vault items for what you cannot. Do NOT ask permission — act first, report after.
|
|
|
|
## Pre-flight metrics (collected $(date -u +%H:%M) UTC)
|
|
${PREFLIGHT_OUTPUT}
|
|
|
|
## Project context
|
|
${CONTEXT_BLOCK}$(formula_lessons_block)
|
|
${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT}
|
|
}
|
|
${OPS_STATUS}
|
|
Priority order: P0 memory > P1 disk > P2 stopped > P3 degraded > P4 housekeeping
|
|
|
|
${FORMULA_CONTENT}
|
|
|
|
${SCRATCH_INSTRUCTION}
|
|
${PROMPT_FOOTER}"
|
|
|
|
# ── Run agent ─────────────────────────────────────────────────────────────
|
|
agent_run --worktree "$WORKTREE" "$PROMPT"
|
|
log "agent_run complete"
|
|
|
|
# Write journal entry post-session
|
|
profile_write_journal "supervisor-run" "Supervisor run $(date -u +%Y-%m-%d)" "complete" "" || true
|
|
|
|
rm -f "$SCRATCH_FILE"
|
|
log "--- Supervisor run done ---"
|