From c06cf81031ba96f43aea3618495a1adebb1d322a Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 19 Mar 2026 10:03:45 +0000 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20feat:=20prediction-poll.sh=20?= =?UTF-8?q?=E2=80=94=20per-project=20LLM=20prediction=20agent=20(#140)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- planner/prediction-agent.sh | 269 ++++++++++++++++++++++++++++++++++++ planner/prediction-poll.sh | 66 +++++++++ 2 files changed, 335 insertions(+) create mode 100755 planner/prediction-agent.sh create mode 100755 planner/prediction-poll.sh diff --git a/planner/prediction-agent.sh b/planner/prediction-agent.sh new file mode 100755 index 0000000..59e89be --- /dev/null +++ b/planner/prediction-agent.sh @@ -0,0 +1,269 @@ +#!/usr/bin/env bash +# ============================================================================= +# prediction-agent.sh — Per-project LLM prediction agent +# +# Reads structured evidence from the project's evidence/ directory plus +# secondary Codeberg signals, then asks Claude to identify patterns and +# file up to 5 prediction/unreviewed issues for the planner to triage. +# +# The predictor is the goblin — it sees patterns and shouts about them. +# The planner is the adult — it triages every prediction before acting. +# The predictor MUST NOT emit feature work directly. +# +# Signal sources: +# evidence/red-team/ — attack results, floor status, vulnerability trends +# evidence/evolution/ — fitness scores, champion improvements +# evidence/user-test/ — persona journey completion, friction points +# evidence/holdout/ — scenario pass rates, quality gate history +# evidence/resources/ — CPU, RAM, disk, container utilization +# evidence/protocol/ — on-chain metrics from Ponder +# +# Secondary: +# Codeberg activity (new issues, merged PRs), system resource snapshot +# +# Usage: prediction-agent.sh [project-toml] +# Called by: prediction-poll.sh +# ============================================================================= +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" + +export PROJECT_TOML="${1:-}" +# shellcheck source=../lib/env.sh +source "$FACTORY_ROOT/lib/env.sh" + +LOG_FILE="$SCRIPT_DIR/prediction.log" +CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}" +EVIDENCE_DIR="${PROJECT_REPO_ROOT}/evidence" + +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } + +log "--- prediction-agent start (project: ${PROJECT_NAME}) ---" + +# ── Helpers ─────────────────────────────────────────────────────────────── + +# Find the most recent JSON file in a directory (files named YYYY-MM-DD.json +# sort correctly in alphabetical order). +latest_json() { find "$1" -maxdepth 1 -name '*.json' 2>/dev/null | sort | tail -1; } +prev_json() { find "$1" -maxdepth 1 -name '*.json' 2>/dev/null | sort | tail -2 | head -1; } + +# ── Scan evidence/ directory ────────────────────────────────────────────── +EVIDENCE_SUMMARY="" +for subdir in red-team evolution user-test holdout resources protocol; do + subdir_path="${EVIDENCE_DIR}/${subdir}" + + if [ ! -d "$subdir_path" ]; then + EVIDENCE_SUMMARY="${EVIDENCE_SUMMARY} +=== evidence/${subdir} === +(no data — directory not yet created)" + continue + fi + + latest=$(latest_json "$subdir_path") + if [ -z "$latest" ]; then + EVIDENCE_SUMMARY="${EVIDENCE_SUMMARY} +=== evidence/${subdir} === +(no data — no JSON files found)" + continue + fi + + file_ts=$(date -r "$latest" +%s) + now_ts=$(date +%s) + age_hours=$(( (now_ts - file_ts) / 3600 )) + latest_name=$(basename "$latest") + content=$(head -c 3000 "$latest" 2>/dev/null || echo "{}") + + prev=$(prev_json "$subdir_path") + prev_section="" + if [ -n "$prev" ] && [ "$prev" != "$latest" ]; then + prev_name=$(basename "$prev") + prev_content=$(head -c 2000 "$prev" 2>/dev/null || echo "{}") + prev_section=" + previous: ${prev_name} + previous_content: ${prev_content}" + fi + + EVIDENCE_SUMMARY="${EVIDENCE_SUMMARY} +=== evidence/${subdir} === + latest: ${latest_name} (age: ${age_hours}h, path: ${latest}) + content: ${content}${prev_section}" +done + +# ── Secondary signals — Codeberg activity (last 24h) ───────────────────── +SINCE_ISO=$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || true) +RECENT_ISSUES="" +RECENT_PRS="" +if [ -n "$SINCE_ISO" ]; then + RECENT_ISSUES=$(codeberg_api GET "/issues?state=open&type=issues&limit=20&sort=newest" 2>/dev/null | \ + jq -r --arg since "$SINCE_ISO" \ + '.[] | select(.created_at >= $since) | " #\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' \ + 2>/dev/null || true) + RECENT_PRS=$(codeberg_api GET "/pulls?state=open&limit=20&sort=newest" 2>/dev/null | \ + jq -r --arg since "$SINCE_ISO" \ + '.[] | select(.created_at >= $since) | " #\(.number) \(.title)"' \ + 2>/dev/null || true) +fi + +# ── Already-open predictions (avoid duplicates) ─────────────────────────── +OPEN_PREDICTIONS=$(codeberg_api GET "/issues?state=open&type=issues&labels=prediction%2Funreviewed&limit=50" 2>/dev/null | \ + jq -r '.[] | " #\(.number) \(.title)"' 2>/dev/null || true) + +# ── System resource snapshot ────────────────────────────────────────────── +AVAIL_MB=$(free -m | awk '/Mem:/{print $7}' 2>/dev/null || echo "unknown") +DISK_PCT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%' 2>/dev/null || echo "unknown") +LOAD_AVG=$(cut -d' ' -f1-3 /proc/loadavg 2>/dev/null || echo "unknown") +ACTIVE_SESSIONS=$(tmux list-sessions 2>/dev/null | \ + grep -cE "^(dev|action|gardener|review)-" || echo "0") + +# ── Build prompt ────────────────────────────────────────────────────────── +PROMPT="You are the prediction agent (goblin) for ${CODEBERG_REPO}. + +Your role: spot patterns in evidence and signal them as prediction issues. +The planner (adult) will triage every prediction before acting. +You MUST NOT emit feature work or implementation issues — only predictions +about evidence state, metric trends, and system conditions. + +## Evidence from evidence/ directory +${EVIDENCE_SUMMARY} + +## System resource snapshot (right now) +Available RAM: ${AVAIL_MB}MB +Disk used: ${DISK_PCT}% +Load avg (1/5/15 min): ${LOAD_AVG} +Active agent sessions (tmux): ${ACTIVE_SESSIONS} + +## Recent Codeberg activity (last 24h) +New issues: +${RECENT_ISSUES:- (none)} + +Open PRs (recently updated): +${RECENT_PRS:- (none)} + +## Already-open predictions (do NOT duplicate these) +${OPEN_PREDICTIONS:- (none)} + +## What to look for + +**Staleness** — Evidence older than its expected refresh interval: +- red-team: stale after 7 days +- evolution: stale after 7 days +- user-test: stale after 14 days +- holdout: stale after 7 days +- resources: stale after 1 day +- protocol: stale after 1 day +- any directory missing entirely: flag as critical gap + +**Regression** — Metrics worse in latest vs previous run: +- Decreased: fitness score, pass rate, conversion, floor price +- Increased: error count, risk score, ETH extracted by attacker +- Only flag if change is meaningful (>5% relative, or clearly significant) + +**Opportunity** — Conditions that make a process worth running now: +- Box is relatively idle (RAM>2000MB, load<2.0, no active agent sessions) + AND evidence is stale — good time to run evolution or red-team +- New attack vectors in red-team since last evolution run → evolution scores stale + +**Risk** — Conditions that suggest deferring expensive work: +- RAM<1500MB or disk>85% or load>3.0 → defer evolution/red-team +- Active dev session in progress on related work + +## Output format + +For each prediction, output a JSON object on its own line (no array wrapper, +no markdown fences): + +{\"title\": \"...\", \"signal_source\": \"...\", \"confidence\": \"high|medium|low\", \"suggested_action\": \"...\", \"body\": \"...\"} + +Fields: +- title: Short declarative statement of what you observed. Not an action. +- signal_source: Which evidence file or signal triggered this + (e.g. \"evidence/evolution/2024-01-15.json\", \"system resources\", + \"evidence/red-team/ missing\") +- confidence: high (clear numerical evidence), medium (trend/pattern), + low (inferred or absent data but important to flag) +- suggested_action: Concrete next step for the planner — + \"run formula X\", \"file issue for Y\", \"escalate to human\", + \"monitor for N days\", \"run process X\" +- body: 2-4 sentences. What changed or is missing, why it matters, + what the planner should consider doing. Be specific: name the file, + metric, and value. + +## Rules +- Max 5 predictions total +- Do NOT predict feature work — only evidence/metric/system observations +- Do NOT duplicate existing open predictions (listed above) +- Do NOT predict things you cannot support with the evidence provided +- Prefer high-confidence predictions; emit low-confidence only when the + signal is important (e.g. missing critical evidence) +- Be specific: name the file, the metric, the value + +If you see no meaningful patterns, output exactly: NO_PREDICTIONS + +Output ONLY the JSON lines (or NO_PREDICTIONS) — no preamble, no markdown." + +# ── Invoke Claude (one-shot) ────────────────────────────────────────────── +log "invoking claude -p for ${PROJECT_NAME} predictions" +CLAUDE_OUTPUT=$(timeout "$CLAUDE_TIMEOUT" claude -p "$PROMPT" \ + --model sonnet \ + 2>/dev/null) || { + EXIT_CODE=$? + log "ERROR: claude exited with code $EXIT_CODE" + exit 1 +} + +log "claude finished ($(printf '%s' "$CLAUDE_OUTPUT" | wc -c) bytes)" + +if printf '%s' "$CLAUDE_OUTPUT" | grep -q "NO_PREDICTIONS"; then + log "no predictions — evidence looks healthy for ${PROJECT_NAME}" + log "--- prediction-agent done ---" + exit 0 +fi + +# ── Look up prediction/unreviewed label ─────────────────────────────────── +PREDICTION_LABEL_ID=$(codeberg_api GET "/labels" 2>/dev/null | \ + jq -r '.[] | select(.name == "prediction/unreviewed") | .id' 2>/dev/null || true) +if [ -z "$PREDICTION_LABEL_ID" ]; then + log "WARN: 'prediction/unreviewed' label not found — issues created without label (see #141)" +fi + +# ── Create prediction issues ────────────────────────────────────────────── +CREATED=0 +while IFS= read -r line; do + [ -z "$line" ] && continue + # Skip non-JSON lines + printf '%s' "$line" | jq -e . >/dev/null 2>&1 || continue + + TITLE=$(printf '%s' "$line" | jq -r '.title') + SIGNAL=$(printf '%s' "$line" | jq -r '.signal_source // "unknown"') + CONFIDENCE=$(printf '%s' "$line" | jq -r '.confidence // "medium"') + ACTION=$(printf '%s' "$line" | jq -r '.suggested_action // ""') + BODY_TEXT=$(printf '%s' "$line" | jq -r '.body') + + FULL_BODY="${BODY_TEXT} + +--- +**Signal source:** ${SIGNAL} +**Confidence:** ${CONFIDENCE} +**Suggested action:** ${ACTION}" + + CREATE_PAYLOAD=$(jq -nc --arg t "$TITLE" --arg b "$FULL_BODY" \ + '{title: $t, body: $b}') + + if [ -n "$PREDICTION_LABEL_ID" ]; then + CREATE_PAYLOAD=$(printf '%s' "$CREATE_PAYLOAD" | \ + jq --argjson lid "$PREDICTION_LABEL_ID" '.labels = [$lid]') + fi + + RESULT=$(codeberg_api POST "/issues" -d "$CREATE_PAYLOAD" 2>/dev/null || true) + ISSUE_NUM=$(printf '%s' "$RESULT" | jq -r '.number // "?"' 2>/dev/null || echo "?") + + log "Created prediction #${ISSUE_NUM} [${CONFIDENCE}]: ${TITLE}" + matrix_send "predictor" "🔮 Prediction #${ISSUE_NUM} [${CONFIDENCE}]: ${TITLE} — ${ACTION}" \ + 2>/dev/null || true + + CREATED=$((CREATED + 1)) + [ "$CREATED" -ge 5 ] && break +done <<< "$CLAUDE_OUTPUT" + +log "--- prediction-agent done (created ${CREATED} predictions for ${PROJECT_NAME}) ---" diff --git a/planner/prediction-poll.sh b/planner/prediction-poll.sh new file mode 100755 index 0000000..06bb316 --- /dev/null +++ b/planner/prediction-poll.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# ============================================================================= +# prediction-poll.sh — Cron wrapper for prediction-agent (per-project) +# +# Runs hourly. Guards against concurrent runs and low memory. +# Iterates over all registered projects and runs prediction-agent.sh for each. +# +# Cron: 0 * * * * /path/to/disinto/planner/prediction-poll.sh +# Log: tail -f /path/to/disinto/planner/prediction.log +# ============================================================================= +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" + +# shellcheck source=../lib/env.sh +source "$FACTORY_ROOT/lib/env.sh" + +LOG_FILE="$SCRIPT_DIR/prediction.log" +LOCK_FILE="/tmp/prediction-poll.lock" +PROJECTS_DIR="$FACTORY_ROOT/projects" + +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } + +# ── Lock ────────────────────────────────────────────────────────────────── +if [ -f "$LOCK_FILE" ]; then + LOCK_PID=$(cat "$LOCK_FILE" 2>/dev/null || true) + if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then + log "poll: prediction running (PID $LOCK_PID)" + exit 0 + fi + rm -f "$LOCK_FILE" +fi +echo $$ > "$LOCK_FILE" +trap 'rm -f "$LOCK_FILE"' EXIT + +# ── Memory guard ────────────────────────────────────────────────────────── +AVAIL_MB=$(free -m | awk '/Mem:/{print $7}') +if [ "${AVAIL_MB:-0}" -lt 2000 ]; then + log "poll: skipping — only ${AVAIL_MB}MB available (need 2000)" + exit 0 +fi + +log "--- Prediction poll start ---" + +# ── Iterate over projects ───────────────────────────────────────────────── +PROJECT_COUNT=0 +if [ -d "$PROJECTS_DIR" ]; then + for project_toml in "$PROJECTS_DIR"/*.toml; do + [ -f "$project_toml" ] || continue + PROJECT_COUNT=$((PROJECT_COUNT + 1)) + log "starting prediction-agent for $(basename "$project_toml")" + if ! "$SCRIPT_DIR/prediction-agent.sh" "$project_toml"; then + log "prediction-agent exited non-zero for $(basename "$project_toml")" + fi + done +fi + +if [ "$PROJECT_COUNT" -eq 0 ]; then + log "No projects/*.toml found — running prediction-agent with .env defaults" + if ! "$SCRIPT_DIR/prediction-agent.sh"; then + log "prediction-agent exited non-zero" + fi +fi + +log "--- Prediction poll done ---" From d2f788239aa2f7325001a4031d3ecaa95d5f6979 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 19 Mar 2026 10:13:22 +0000 Subject: [PATCH 2/2] fix: address review feedback on prediction agent (#140) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove CLAUDE_TIMEOUT no-op override — inherit factory default (7200s) from env.sh - Use anchored grep -qxF "NO_PREDICTIONS" to avoid false early exits - Fetch closed PRs (state=closed, merged_at filter) instead of open — captures merged activity signals - Parse staleness age from filename date (YYYY-MM-DD.json) instead of file mtime - Log a warning when date -d falls back due to non-GNU date - Add comment explaining global lock serialisation trade-off Co-Authored-By: Claude Sonnet 4.6 --- planner/prediction-agent.sh | 22 +++++++++++++++------- planner/prediction-poll.sh | 3 +++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/planner/prediction-agent.sh b/planner/prediction-agent.sh index 59e89be..528e425 100755 --- a/planner/prediction-agent.sh +++ b/planner/prediction-agent.sh @@ -34,7 +34,7 @@ export PROJECT_TOML="${1:-}" source "$FACTORY_ROOT/lib/env.sh" LOG_FILE="$SCRIPT_DIR/prediction.log" -CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}" +# env.sh already exports CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"; inherit that default EVIDENCE_DIR="${PROJECT_REPO_ROOT}/evidence" log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } @@ -68,10 +68,13 @@ for subdir in red-team evolution user-test holdout resources protocol; do continue fi - file_ts=$(date -r "$latest" +%s) + latest_name=$(basename "$latest") + # Derive age from the date in the filename (YYYY-MM-DD.json) — more reliable + # than mtime, which changes when files are copied or synced. + file_date=$(basename "$latest" .json) + file_ts=$(date -d "$file_date" +%s 2>/dev/null || date -r "$latest" +%s) now_ts=$(date +%s) age_hours=$(( (now_ts - file_ts) / 3600 )) - latest_name=$(basename "$latest") content=$(head -c 3000 "$latest" 2>/dev/null || echo "{}") prev=$(prev_json "$subdir_path") @@ -92,6 +95,9 @@ done # ── Secondary signals — Codeberg activity (last 24h) ───────────────────── SINCE_ISO=$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || true) +if [ -z "$SINCE_ISO" ]; then + log "WARN: date -d '24 hours ago' failed (non-GNU date?) — skipping Codeberg activity" +fi RECENT_ISSUES="" RECENT_PRS="" if [ -n "$SINCE_ISO" ]; then @@ -99,9 +105,11 @@ if [ -n "$SINCE_ISO" ]; then jq -r --arg since "$SINCE_ISO" \ '.[] | select(.created_at >= $since) | " #\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' \ 2>/dev/null || true) - RECENT_PRS=$(codeberg_api GET "/pulls?state=open&limit=20&sort=newest" 2>/dev/null | \ + # Use state=closed to capture recently-merged PRs — merged activity is the + # key signal (e.g. new red-team PR merged since last evolution run). + RECENT_PRS=$(codeberg_api GET "/pulls?state=closed&limit=20&sort=newest" 2>/dev/null | \ jq -r --arg since "$SINCE_ISO" \ - '.[] | select(.created_at >= $since) | " #\(.number) \(.title)"' \ + '.[] | select(.merged_at != null and .merged_at >= $since) | " #\(.number) \(.title) (merged \(.merged_at[:10]))"' \ 2>/dev/null || true) fi @@ -137,7 +145,7 @@ Active agent sessions (tmux): ${ACTIVE_SESSIONS} New issues: ${RECENT_ISSUES:- (none)} -Open PRs (recently updated): +Recently merged PRs (last 24h): ${RECENT_PRS:- (none)} ## Already-open predictions (do NOT duplicate these) @@ -214,7 +222,7 @@ CLAUDE_OUTPUT=$(timeout "$CLAUDE_TIMEOUT" claude -p "$PROMPT" \ log "claude finished ($(printf '%s' "$CLAUDE_OUTPUT" | wc -c) bytes)" -if printf '%s' "$CLAUDE_OUTPUT" | grep -q "NO_PREDICTIONS"; then +if printf '%s' "$CLAUDE_OUTPUT" | grep -qxF "NO_PREDICTIONS"; then log "no predictions — evidence looks healthy for ${PROJECT_NAME}" log "--- prediction-agent done ---" exit 0 diff --git a/planner/prediction-poll.sh b/planner/prediction-poll.sh index 06bb316..18ed401 100755 --- a/planner/prediction-poll.sh +++ b/planner/prediction-poll.sh @@ -17,6 +17,9 @@ FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" source "$FACTORY_ROOT/lib/env.sh" LOG_FILE="$SCRIPT_DIR/prediction.log" +# Global lock — projects are processed serially. If a single run takes longer +# than the cron interval (1h), the next cron invocation will find the lock held +# and exit silently. That is acceptable: LLM calls are cheap to skip. LOCK_FILE="/tmp/prediction-poll.lock" PROJECTS_DIR="$FACTORY_ROOT/projects"