From 446e6ef0a8f3ff238c7a6300bbd544615aad6b12 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 21 Mar 2026 15:40:07 +0000 Subject: [PATCH] fix: Remove legacy predictor (prediction-poll.sh + prediction-agent.sh + cron entry) (#419) Co-Authored-By: Claude Opus 4.6 (1M context) --- .woodpecker/agent-smoke.sh | 2 - AGENTS.md | 1 - planner/prediction-agent.sh | 277 ------------------------------------ planner/prediction-poll.sh | 69 --------- predictor/AGENTS.md | 6 - 5 files changed, 355 deletions(-) delete mode 100755 planner/prediction-agent.sh delete mode 100755 planner/prediction-poll.sh diff --git a/.woodpecker/agent-smoke.sh b/.woodpecker/agent-smoke.sh index 7aeaa9c..5f9f978 100644 --- a/.woodpecker/agent-smoke.sh +++ b/.woodpecker/agent-smoke.sh @@ -196,8 +196,6 @@ check_script gardener/gardener-run.sh check_script review/review-pr.sh check_script review/review-poll.sh check_script planner/planner-run.sh -check_script planner/prediction-agent.sh -check_script planner/prediction-poll.sh check_script supervisor/supervisor-poll.sh check_script supervisor/update-prompt.sh check_script vault/vault-agent.sh diff --git a/AGENTS.md b/AGENTS.md index 03537b3..1c800fe 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -20,7 +20,6 @@ disinto/ ├── predictor/ predictor-run.sh — daily cron executor for run-predictor formula ├── planner/ planner-run.sh — direct cron executor for run-planner formula │ planner/journal/ — daily raw logs from each planner run -│ prediction-poll.sh, prediction-agent.sh — legacy predictor (superseded by predictor/) ├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper) │ preflight.sh — pre-flight data collection for supervisor formula │ supervisor/journal/ — daily health logs from each run diff --git a/planner/prediction-agent.sh b/planner/prediction-agent.sh deleted file mode 100755 index 528e425..0000000 --- a/planner/prediction-agent.sh +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# prediction-agent.sh — Per-project LLM prediction agent -# -# Reads structured evidence from the project's evidence/ directory plus -# secondary Codeberg signals, then asks Claude to identify patterns and -# file up to 5 prediction/unreviewed issues for the planner to triage. -# -# The predictor is the goblin — it sees patterns and shouts about them. -# The planner is the adult — it triages every prediction before acting. -# The predictor MUST NOT emit feature work directly. -# -# Signal sources: -# evidence/red-team/ — attack results, floor status, vulnerability trends -# evidence/evolution/ — fitness scores, champion improvements -# evidence/user-test/ — persona journey completion, friction points -# evidence/holdout/ — scenario pass rates, quality gate history -# evidence/resources/ — CPU, RAM, disk, container utilization -# evidence/protocol/ — on-chain metrics from Ponder -# -# Secondary: -# Codeberg activity (new issues, merged PRs), system resource snapshot -# -# Usage: prediction-agent.sh [project-toml] -# Called by: prediction-poll.sh -# ============================================================================= -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" - -export PROJECT_TOML="${1:-}" -# shellcheck source=../lib/env.sh -source "$FACTORY_ROOT/lib/env.sh" - -LOG_FILE="$SCRIPT_DIR/prediction.log" -# env.sh already exports CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"; inherit that default -EVIDENCE_DIR="${PROJECT_REPO_ROOT}/evidence" - -log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } - -log "--- prediction-agent start (project: ${PROJECT_NAME}) ---" - -# ── Helpers ─────────────────────────────────────────────────────────────── - -# Find the most recent JSON file in a directory (files named YYYY-MM-DD.json -# sort correctly in alphabetical order). -latest_json() { find "$1" -maxdepth 1 -name '*.json' 2>/dev/null | sort | tail -1; } -prev_json() { find "$1" -maxdepth 1 -name '*.json' 2>/dev/null | sort | tail -2 | head -1; } - -# ── Scan evidence/ directory ────────────────────────────────────────────── -EVIDENCE_SUMMARY="" -for subdir in red-team evolution user-test holdout resources protocol; do - subdir_path="${EVIDENCE_DIR}/${subdir}" - - if [ ! -d "$subdir_path" ]; then - EVIDENCE_SUMMARY="${EVIDENCE_SUMMARY} -=== evidence/${subdir} === -(no data — directory not yet created)" - continue - fi - - latest=$(latest_json "$subdir_path") - if [ -z "$latest" ]; then - EVIDENCE_SUMMARY="${EVIDENCE_SUMMARY} -=== evidence/${subdir} === -(no data — no JSON files found)" - continue - fi - - latest_name=$(basename "$latest") - # Derive age from the date in the filename (YYYY-MM-DD.json) — more reliable - # than mtime, which changes when files are copied or synced. - file_date=$(basename "$latest" .json) - file_ts=$(date -d "$file_date" +%s 2>/dev/null || date -r "$latest" +%s) - now_ts=$(date +%s) - age_hours=$(( (now_ts - file_ts) / 3600 )) - content=$(head -c 3000 "$latest" 2>/dev/null || echo "{}") - - prev=$(prev_json "$subdir_path") - prev_section="" - if [ -n "$prev" ] && [ "$prev" != "$latest" ]; then - prev_name=$(basename "$prev") - prev_content=$(head -c 2000 "$prev" 2>/dev/null || echo "{}") - prev_section=" - previous: ${prev_name} - previous_content: ${prev_content}" - fi - - EVIDENCE_SUMMARY="${EVIDENCE_SUMMARY} -=== evidence/${subdir} === - latest: ${latest_name} (age: ${age_hours}h, path: ${latest}) - content: ${content}${prev_section}" -done - -# ── Secondary signals — Codeberg activity (last 24h) ───────────────────── -SINCE_ISO=$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || true) -if [ -z "$SINCE_ISO" ]; then - log "WARN: date -d '24 hours ago' failed (non-GNU date?) — skipping Codeberg activity" -fi -RECENT_ISSUES="" -RECENT_PRS="" -if [ -n "$SINCE_ISO" ]; then - RECENT_ISSUES=$(codeberg_api GET "/issues?state=open&type=issues&limit=20&sort=newest" 2>/dev/null | \ - jq -r --arg since "$SINCE_ISO" \ - '.[] | select(.created_at >= $since) | " #\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' \ - 2>/dev/null || true) - # Use state=closed to capture recently-merged PRs — merged activity is the - # key signal (e.g. new red-team PR merged since last evolution run). - RECENT_PRS=$(codeberg_api GET "/pulls?state=closed&limit=20&sort=newest" 2>/dev/null | \ - jq -r --arg since "$SINCE_ISO" \ - '.[] | select(.merged_at != null and .merged_at >= $since) | " #\(.number) \(.title) (merged \(.merged_at[:10]))"' \ - 2>/dev/null || true) -fi - -# ── Already-open predictions (avoid duplicates) ─────────────────────────── -OPEN_PREDICTIONS=$(codeberg_api GET "/issues?state=open&type=issues&labels=prediction%2Funreviewed&limit=50" 2>/dev/null | \ - jq -r '.[] | " #\(.number) \(.title)"' 2>/dev/null || true) - -# ── System resource snapshot ────────────────────────────────────────────── -AVAIL_MB=$(free -m | awk '/Mem:/{print $7}' 2>/dev/null || echo "unknown") -DISK_PCT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%' 2>/dev/null || echo "unknown") -LOAD_AVG=$(cut -d' ' -f1-3 /proc/loadavg 2>/dev/null || echo "unknown") -ACTIVE_SESSIONS=$(tmux list-sessions 2>/dev/null | \ - grep -cE "^(dev|action|gardener|review)-" || echo "0") - -# ── Build prompt ────────────────────────────────────────────────────────── -PROMPT="You are the prediction agent (goblin) for ${CODEBERG_REPO}. - -Your role: spot patterns in evidence and signal them as prediction issues. -The planner (adult) will triage every prediction before acting. -You MUST NOT emit feature work or implementation issues — only predictions -about evidence state, metric trends, and system conditions. - -## Evidence from evidence/ directory -${EVIDENCE_SUMMARY} - -## System resource snapshot (right now) -Available RAM: ${AVAIL_MB}MB -Disk used: ${DISK_PCT}% -Load avg (1/5/15 min): ${LOAD_AVG} -Active agent sessions (tmux): ${ACTIVE_SESSIONS} - -## Recent Codeberg activity (last 24h) -New issues: -${RECENT_ISSUES:- (none)} - -Recently merged PRs (last 24h): -${RECENT_PRS:- (none)} - -## Already-open predictions (do NOT duplicate these) -${OPEN_PREDICTIONS:- (none)} - -## What to look for - -**Staleness** — Evidence older than its expected refresh interval: -- red-team: stale after 7 days -- evolution: stale after 7 days -- user-test: stale after 14 days -- holdout: stale after 7 days -- resources: stale after 1 day -- protocol: stale after 1 day -- any directory missing entirely: flag as critical gap - -**Regression** — Metrics worse in latest vs previous run: -- Decreased: fitness score, pass rate, conversion, floor price -- Increased: error count, risk score, ETH extracted by attacker -- Only flag if change is meaningful (>5% relative, or clearly significant) - -**Opportunity** — Conditions that make a process worth running now: -- Box is relatively idle (RAM>2000MB, load<2.0, no active agent sessions) - AND evidence is stale — good time to run evolution or red-team -- New attack vectors in red-team since last evolution run → evolution scores stale - -**Risk** — Conditions that suggest deferring expensive work: -- RAM<1500MB or disk>85% or load>3.0 → defer evolution/red-team -- Active dev session in progress on related work - -## Output format - -For each prediction, output a JSON object on its own line (no array wrapper, -no markdown fences): - -{\"title\": \"...\", \"signal_source\": \"...\", \"confidence\": \"high|medium|low\", \"suggested_action\": \"...\", \"body\": \"...\"} - -Fields: -- title: Short declarative statement of what you observed. Not an action. -- signal_source: Which evidence file or signal triggered this - (e.g. \"evidence/evolution/2024-01-15.json\", \"system resources\", - \"evidence/red-team/ missing\") -- confidence: high (clear numerical evidence), medium (trend/pattern), - low (inferred or absent data but important to flag) -- suggested_action: Concrete next step for the planner — - \"run formula X\", \"file issue for Y\", \"escalate to human\", - \"monitor for N days\", \"run process X\" -- body: 2-4 sentences. What changed or is missing, why it matters, - what the planner should consider doing. Be specific: name the file, - metric, and value. - -## Rules -- Max 5 predictions total -- Do NOT predict feature work — only evidence/metric/system observations -- Do NOT duplicate existing open predictions (listed above) -- Do NOT predict things you cannot support with the evidence provided -- Prefer high-confidence predictions; emit low-confidence only when the - signal is important (e.g. missing critical evidence) -- Be specific: name the file, the metric, the value - -If you see no meaningful patterns, output exactly: NO_PREDICTIONS - -Output ONLY the JSON lines (or NO_PREDICTIONS) — no preamble, no markdown." - -# ── Invoke Claude (one-shot) ────────────────────────────────────────────── -log "invoking claude -p for ${PROJECT_NAME} predictions" -CLAUDE_OUTPUT=$(timeout "$CLAUDE_TIMEOUT" claude -p "$PROMPT" \ - --model sonnet \ - 2>/dev/null) || { - EXIT_CODE=$? - log "ERROR: claude exited with code $EXIT_CODE" - exit 1 -} - -log "claude finished ($(printf '%s' "$CLAUDE_OUTPUT" | wc -c) bytes)" - -if printf '%s' "$CLAUDE_OUTPUT" | grep -qxF "NO_PREDICTIONS"; then - log "no predictions — evidence looks healthy for ${PROJECT_NAME}" - log "--- prediction-agent done ---" - exit 0 -fi - -# ── Look up prediction/unreviewed label ─────────────────────────────────── -PREDICTION_LABEL_ID=$(codeberg_api GET "/labels" 2>/dev/null | \ - jq -r '.[] | select(.name == "prediction/unreviewed") | .id' 2>/dev/null || true) -if [ -z "$PREDICTION_LABEL_ID" ]; then - log "WARN: 'prediction/unreviewed' label not found — issues created without label (see #141)" -fi - -# ── Create prediction issues ────────────────────────────────────────────── -CREATED=0 -while IFS= read -r line; do - [ -z "$line" ] && continue - # Skip non-JSON lines - printf '%s' "$line" | jq -e . >/dev/null 2>&1 || continue - - TITLE=$(printf '%s' "$line" | jq -r '.title') - SIGNAL=$(printf '%s' "$line" | jq -r '.signal_source // "unknown"') - CONFIDENCE=$(printf '%s' "$line" | jq -r '.confidence // "medium"') - ACTION=$(printf '%s' "$line" | jq -r '.suggested_action // ""') - BODY_TEXT=$(printf '%s' "$line" | jq -r '.body') - - FULL_BODY="${BODY_TEXT} - ---- -**Signal source:** ${SIGNAL} -**Confidence:** ${CONFIDENCE} -**Suggested action:** ${ACTION}" - - CREATE_PAYLOAD=$(jq -nc --arg t "$TITLE" --arg b "$FULL_BODY" \ - '{title: $t, body: $b}') - - if [ -n "$PREDICTION_LABEL_ID" ]; then - CREATE_PAYLOAD=$(printf '%s' "$CREATE_PAYLOAD" | \ - jq --argjson lid "$PREDICTION_LABEL_ID" '.labels = [$lid]') - fi - - RESULT=$(codeberg_api POST "/issues" -d "$CREATE_PAYLOAD" 2>/dev/null || true) - ISSUE_NUM=$(printf '%s' "$RESULT" | jq -r '.number // "?"' 2>/dev/null || echo "?") - - log "Created prediction #${ISSUE_NUM} [${CONFIDENCE}]: ${TITLE}" - matrix_send "predictor" "🔮 Prediction #${ISSUE_NUM} [${CONFIDENCE}]: ${TITLE} — ${ACTION}" \ - 2>/dev/null || true - - CREATED=$((CREATED + 1)) - [ "$CREATED" -ge 5 ] && break -done <<< "$CLAUDE_OUTPUT" - -log "--- prediction-agent done (created ${CREATED} predictions for ${PROJECT_NAME}) ---" diff --git a/planner/prediction-poll.sh b/planner/prediction-poll.sh deleted file mode 100755 index 18ed401..0000000 --- a/planner/prediction-poll.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# prediction-poll.sh — Cron wrapper for prediction-agent (per-project) -# -# Runs hourly. Guards against concurrent runs and low memory. -# Iterates over all registered projects and runs prediction-agent.sh for each. -# -# Cron: 0 * * * * /path/to/disinto/planner/prediction-poll.sh -# Log: tail -f /path/to/disinto/planner/prediction.log -# ============================================================================= -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" - -# shellcheck source=../lib/env.sh -source "$FACTORY_ROOT/lib/env.sh" - -LOG_FILE="$SCRIPT_DIR/prediction.log" -# Global lock — projects are processed serially. If a single run takes longer -# than the cron interval (1h), the next cron invocation will find the lock held -# and exit silently. That is acceptable: LLM calls are cheap to skip. -LOCK_FILE="/tmp/prediction-poll.lock" -PROJECTS_DIR="$FACTORY_ROOT/projects" - -log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } - -# ── Lock ────────────────────────────────────────────────────────────────── -if [ -f "$LOCK_FILE" ]; then - LOCK_PID=$(cat "$LOCK_FILE" 2>/dev/null || true) - if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then - log "poll: prediction running (PID $LOCK_PID)" - exit 0 - fi - rm -f "$LOCK_FILE" -fi -echo $$ > "$LOCK_FILE" -trap 'rm -f "$LOCK_FILE"' EXIT - -# ── Memory guard ────────────────────────────────────────────────────────── -AVAIL_MB=$(free -m | awk '/Mem:/{print $7}') -if [ "${AVAIL_MB:-0}" -lt 2000 ]; then - log "poll: skipping — only ${AVAIL_MB}MB available (need 2000)" - exit 0 -fi - -log "--- Prediction poll start ---" - -# ── Iterate over projects ───────────────────────────────────────────────── -PROJECT_COUNT=0 -if [ -d "$PROJECTS_DIR" ]; then - for project_toml in "$PROJECTS_DIR"/*.toml; do - [ -f "$project_toml" ] || continue - PROJECT_COUNT=$((PROJECT_COUNT + 1)) - log "starting prediction-agent for $(basename "$project_toml")" - if ! "$SCRIPT_DIR/prediction-agent.sh" "$project_toml"; then - log "prediction-agent exited non-zero for $(basename "$project_toml")" - fi - done -fi - -if [ "$PROJECT_COUNT" -eq 0 ]; then - log "No projects/*.toml found — running prediction-agent with .env defaults" - if ! "$SCRIPT_DIR/prediction-agent.sh"; then - log "prediction-agent exited non-zero" - fi -fi - -log "--- Prediction poll done ---" diff --git a/predictor/AGENTS.md b/predictor/AGENTS.md index 1b81dee..86df19e 100644 --- a/predictor/AGENTS.md +++ b/predictor/AGENTS.md @@ -23,12 +23,6 @@ memory check (skips if available RAM < 2000 MB). collect-signals, analyze-and-predict) with `needs` dependencies. Claude collects signals and files prediction issues in a single interactive session -**Supersedes**: The legacy predictor (`planner/prediction-poll.sh` + -`planner/prediction-agent.sh`) used `claude -p` one-shot, read `evidence/` -JSON, and ran hourly. This formula-based predictor replaces it with direct -CI/issues/logs signal collection and interactive Claude sessions, matching the -planner's tmux+formula pattern. - **Environment variables consumed**: - `CODEBERG_TOKEN`, `CODEBERG_REPO`, `CODEBERG_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT` - `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by predictor-run.sh)