From c06cf81031ba96f43aea3618495a1adebb1d322a Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 19 Mar 2026 10:03:45 +0000
Subject: [PATCH 1/2] =?UTF-8?q?fix:=20feat:=20prediction-poll.sh=20?=
 =?UTF-8?q?=E2=80=94=20per-project=20LLM=20prediction=20agent=20(#140)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 planner/prediction-agent.sh | 269 ++++++++++++++++++++++++++++++++++++
 planner/prediction-poll.sh  |  66 +++++++++
 2 files changed, 335 insertions(+)
 create mode 100755 planner/prediction-agent.sh
 create mode 100755 planner/prediction-poll.sh

diff --git a/planner/prediction-agent.sh b/planner/prediction-agent.sh
new file mode 100755
index 0000000..59e89be
--- /dev/null
+++ b/planner/prediction-agent.sh
@@ -0,0 +1,269 @@
+#!/usr/bin/env bash
+# =============================================================================
+# prediction-agent.sh — Per-project LLM prediction agent
+#
+# Reads structured evidence from the project's evidence/ directory plus
+# secondary Codeberg signals, then asks Claude to identify patterns and
+# file up to 5 prediction/unreviewed issues for the planner to triage.
+#
+# The predictor is the goblin — it sees patterns and shouts about them.
+# The planner is the adult — it triages every prediction before acting.
+# The predictor MUST NOT emit feature work directly.
+#
+# Signal sources:
+#   evidence/red-team/    — attack results, floor status, vulnerability trends
+#   evidence/evolution/   — fitness scores, champion improvements
+#   evidence/user-test/   — persona journey completion, friction points
+#   evidence/holdout/     — scenario pass rates, quality gate history
+#   evidence/resources/   — CPU, RAM, disk, container utilization
+#   evidence/protocol/    — on-chain metrics from Ponder
+#
+# Secondary:
+#   Codeberg activity (new issues, merged PRs), system resource snapshot
+#
+# Usage: prediction-agent.sh [project-toml]
+# Called by: prediction-poll.sh
+# =============================================================================
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
+
+export PROJECT_TOML="${1:-}"
+# shellcheck source=../lib/env.sh
+source "$FACTORY_ROOT/lib/env.sh"
+
+LOG_FILE="$SCRIPT_DIR/prediction.log"
+CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}"
+EVIDENCE_DIR="${PROJECT_REPO_ROOT}/evidence"
+
+log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
+
+log "--- prediction-agent start (project: ${PROJECT_NAME}) ---"
+
+# ── Helpers ───────────────────────────────────────────────────────────────
+
+# Find the most recent JSON file in a directory (files named YYYY-MM-DD.json
+# sort correctly in alphabetical order).
+latest_json() { find "$1" -maxdepth 1 -name '*.json' 2>/dev/null | sort | tail -1; }
+prev_json()   { find "$1" -maxdepth 1 -name '*.json' 2>/dev/null | sort | tail -2 | head -1; }
+
+# ── Scan evidence/ directory ──────────────────────────────────────────────
+EVIDENCE_SUMMARY=""
+for subdir in red-team evolution user-test holdout resources protocol; do
+  subdir_path="${EVIDENCE_DIR}/${subdir}"
+
+  if [ ! -d "$subdir_path" ]; then
+    EVIDENCE_SUMMARY="${EVIDENCE_SUMMARY}
+=== evidence/${subdir} ===
+(no data — directory not yet created)"
+    continue
+  fi
+
+  latest=$(latest_json "$subdir_path")
+  if [ -z "$latest" ]; then
+    EVIDENCE_SUMMARY="${EVIDENCE_SUMMARY}
+=== evidence/${subdir} ===
+(no data — no JSON files found)"
+    continue
+  fi
+
+  file_ts=$(date -r "$latest" +%s)
+  now_ts=$(date +%s)
+  age_hours=$(( (now_ts - file_ts) / 3600 ))
+  latest_name=$(basename "$latest")
+  content=$(head -c 3000 "$latest" 2>/dev/null || echo "{}")
+
+  prev=$(prev_json "$subdir_path")
+  prev_section=""
+  if [ -n "$prev" ] && [ "$prev" != "$latest" ]; then
+    prev_name=$(basename "$prev")
+    prev_content=$(head -c 2000 "$prev" 2>/dev/null || echo "{}")
+    prev_section="
+  previous: ${prev_name}
+  previous_content: ${prev_content}"
+  fi
+
+  EVIDENCE_SUMMARY="${EVIDENCE_SUMMARY}
+=== evidence/${subdir} ===
+  latest: ${latest_name} (age: ${age_hours}h, path: ${latest})
+  content: ${content}${prev_section}"
+done
+
+# ── Secondary signals — Codeberg activity (last 24h) ─────────────────────
+SINCE_ISO=$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || true)
+RECENT_ISSUES=""
+RECENT_PRS=""
+if [ -n "$SINCE_ISO" ]; then
+  RECENT_ISSUES=$(codeberg_api GET "/issues?state=open&type=issues&limit=20&sort=newest" 2>/dev/null | \
+    jq -r --arg since "$SINCE_ISO" \
+    '.[] | select(.created_at >= $since) | "  #\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' \
+    2>/dev/null || true)
+  RECENT_PRS=$(codeberg_api GET "/pulls?state=open&limit=20&sort=newest" 2>/dev/null | \
+    jq -r --arg since "$SINCE_ISO" \
+    '.[] | select(.created_at >= $since) | "  #\(.number) \(.title)"' \
+    2>/dev/null || true)
+fi
+
+# ── Already-open predictions (avoid duplicates) ───────────────────────────
+OPEN_PREDICTIONS=$(codeberg_api GET "/issues?state=open&type=issues&labels=prediction%2Funreviewed&limit=50" 2>/dev/null | \
+  jq -r '.[] | "  #\(.number) \(.title)"' 2>/dev/null || true)
+
+# ── System resource snapshot ──────────────────────────────────────────────
+AVAIL_MB=$(free -m | awk '/Mem:/{print $7}' 2>/dev/null || echo "unknown")
+DISK_PCT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%' 2>/dev/null || echo "unknown")
+LOAD_AVG=$(cut -d' ' -f1-3 /proc/loadavg 2>/dev/null || echo "unknown")
+ACTIVE_SESSIONS=$(tmux list-sessions 2>/dev/null | \
+  grep -cE "^(dev|action|gardener|review)-" || echo "0")
+
+# ── Build prompt ──────────────────────────────────────────────────────────
+PROMPT="You are the prediction agent (goblin) for ${CODEBERG_REPO}.
+
+Your role: spot patterns in evidence and signal them as prediction issues.
+The planner (adult) will triage every prediction before acting.
+You MUST NOT emit feature work or implementation issues — only predictions
+about evidence state, metric trends, and system conditions.
+
+## Evidence from evidence/ directory
+${EVIDENCE_SUMMARY}
+
+## System resource snapshot (right now)
+Available RAM: ${AVAIL_MB}MB
+Disk used: ${DISK_PCT}%
+Load avg (1/5/15 min): ${LOAD_AVG}
+Active agent sessions (tmux): ${ACTIVE_SESSIONS}
+
+## Recent Codeberg activity (last 24h)
+New issues:
+${RECENT_ISSUES:-  (none)}
+
+Open PRs (recently updated):
+${RECENT_PRS:-  (none)}
+
+## Already-open predictions (do NOT duplicate these)
+${OPEN_PREDICTIONS:-  (none)}
+
+## What to look for
+
+**Staleness** — Evidence older than its expected refresh interval:
+- red-team: stale after 7 days
+- evolution: stale after 7 days
+- user-test: stale after 14 days
+- holdout: stale after 7 days
+- resources: stale after 1 day
+- protocol: stale after 1 day
+- any directory missing entirely: flag as critical gap
+
+**Regression** — Metrics worse in latest vs previous run:
+- Decreased: fitness score, pass rate, conversion, floor price
+- Increased: error count, risk score, ETH extracted by attacker
+- Only flag if change is meaningful (>5% relative, or clearly significant)
+
+**Opportunity** — Conditions that make a process worth running now:
+- Box is relatively idle (RAM>2000MB, load<2.0, no active agent sessions)
+  AND evidence is stale — good time to run evolution or red-team
+- New attack vectors in red-team since last evolution run → evolution scores stale
+
+**Risk** — Conditions that suggest deferring expensive work:
+- RAM<1500MB or disk>85% or load>3.0 → defer evolution/red-team
+- Active dev session in progress on related work
+
+## Output format
+
+For each prediction, output a JSON object on its own line (no array wrapper,
+no markdown fences):
+
+{\"title\": \"...\", \"signal_source\": \"...\", \"confidence\": \"high|medium|low\", \"suggested_action\": \"...\", \"body\": \"...\"}
+
+Fields:
+- title: Short declarative statement of what you observed. Not an action.
+- signal_source: Which evidence file or signal triggered this
+  (e.g. \"evidence/evolution/2024-01-15.json\", \"system resources\",
+  \"evidence/red-team/ missing\")
+- confidence: high (clear numerical evidence), medium (trend/pattern),
+  low (inferred or absent data but important to flag)
+- suggested_action: Concrete next step for the planner —
+  \"run formula X\", \"file issue for Y\", \"escalate to human\",
+  \"monitor for N days\", \"run process X\"
+- body: 2-4 sentences. What changed or is missing, why it matters,
+  what the planner should consider doing. Be specific: name the file,
+  metric, and value.
+
+## Rules
+- Max 5 predictions total
+- Do NOT predict feature work — only evidence/metric/system observations
+- Do NOT duplicate existing open predictions (listed above)
+- Do NOT predict things you cannot support with the evidence provided
+- Prefer high-confidence predictions; emit low-confidence only when the
+  signal is important (e.g. missing critical evidence)
+- Be specific: name the file, the metric, the value
+
+If you see no meaningful patterns, output exactly: NO_PREDICTIONS
+
+Output ONLY the JSON lines (or NO_PREDICTIONS) — no preamble, no markdown."
+
+# ── Invoke Claude (one-shot) ──────────────────────────────────────────────
+log "invoking claude -p for ${PROJECT_NAME} predictions"
+CLAUDE_OUTPUT=$(timeout "$CLAUDE_TIMEOUT" claude -p "$PROMPT" \
+  --model sonnet \
+  2>/dev/null) || {
+  EXIT_CODE=$?
+  log "ERROR: claude exited with code $EXIT_CODE"
+  exit 1
+}
+
+log "claude finished ($(printf '%s' "$CLAUDE_OUTPUT" | wc -c) bytes)"
+
+if printf '%s' "$CLAUDE_OUTPUT" | grep -q "NO_PREDICTIONS"; then
+  log "no predictions — evidence looks healthy for ${PROJECT_NAME}"
+  log "--- prediction-agent done ---"
+  exit 0
+fi
+
+# ── Look up prediction/unreviewed label ───────────────────────────────────
+PREDICTION_LABEL_ID=$(codeberg_api GET "/labels" 2>/dev/null | \
+  jq -r '.[] | select(.name == "prediction/unreviewed") | .id' 2>/dev/null || true)
+if [ -z "$PREDICTION_LABEL_ID" ]; then
+  log "WARN: 'prediction/unreviewed' label not found — issues created without label (see #141)"
+fi
+
+# ── Create prediction issues ──────────────────────────────────────────────
+CREATED=0
+while IFS= read -r line; do
+  [ -z "$line" ] && continue
+  # Skip non-JSON lines
+  printf '%s' "$line" | jq -e . >/dev/null 2>&1 || continue
+
+  TITLE=$(printf '%s' "$line"  | jq -r '.title')
+  SIGNAL=$(printf '%s' "$line" | jq -r '.signal_source // "unknown"')
+  CONFIDENCE=$(printf '%s' "$line" | jq -r '.confidence // "medium"')
+  ACTION=$(printf '%s' "$line" | jq -r '.suggested_action // ""')
+  BODY_TEXT=$(printf '%s' "$line" | jq -r '.body')
+
+  FULL_BODY="${BODY_TEXT}
+
+---
+**Signal source:** ${SIGNAL}
+**Confidence:** ${CONFIDENCE}
+**Suggested action:** ${ACTION}"
+
+  CREATE_PAYLOAD=$(jq -nc --arg t "$TITLE" --arg b "$FULL_BODY" \
+    '{title: $t, body: $b}')
+
+  if [ -n "$PREDICTION_LABEL_ID" ]; then
+    CREATE_PAYLOAD=$(printf '%s' "$CREATE_PAYLOAD" | \
+      jq --argjson lid "$PREDICTION_LABEL_ID" '.labels = [$lid]')
+  fi
+
+  RESULT=$(codeberg_api POST "/issues" -d "$CREATE_PAYLOAD" 2>/dev/null || true)
+  ISSUE_NUM=$(printf '%s' "$RESULT" | jq -r '.number // "?"' 2>/dev/null || echo "?")
+
+  log "Created prediction #${ISSUE_NUM} [${CONFIDENCE}]: ${TITLE}"
+  matrix_send "predictor" "🔮 Prediction #${ISSUE_NUM} [${CONFIDENCE}]: ${TITLE} — ${ACTION}" \
+    2>/dev/null || true
+
+  CREATED=$((CREATED + 1))
+  [ "$CREATED" -ge 5 ] && break
+done <<< "$CLAUDE_OUTPUT"
+
+log "--- prediction-agent done (created ${CREATED} predictions for ${PROJECT_NAME}) ---"
diff --git a/planner/prediction-poll.sh b/planner/prediction-poll.sh
new file mode 100755
index 0000000..06bb316
--- /dev/null
+++ b/planner/prediction-poll.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+# =============================================================================
+# prediction-poll.sh — Cron wrapper for prediction-agent (per-project)
+#
+# Runs hourly. Guards against concurrent runs and low memory.
+# Iterates over all registered projects and runs prediction-agent.sh for each.
+#
+# Cron: 0 * * * * /path/to/disinto/planner/prediction-poll.sh
+# Log:  tail -f /path/to/disinto/planner/prediction.log
+# =============================================================================
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
+
+# shellcheck source=../lib/env.sh
+source "$FACTORY_ROOT/lib/env.sh"
+
+LOG_FILE="$SCRIPT_DIR/prediction.log"
+LOCK_FILE="/tmp/prediction-poll.lock"
+PROJECTS_DIR="$FACTORY_ROOT/projects"
+
+log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
+
+# ── Lock ──────────────────────────────────────────────────────────────────
+if [ -f "$LOCK_FILE" ]; then
+  LOCK_PID=$(cat "$LOCK_FILE" 2>/dev/null || true)
+  if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then
+    log "poll: prediction running (PID $LOCK_PID)"
+    exit 0
+  fi
+  rm -f "$LOCK_FILE"
+fi
+echo $$ > "$LOCK_FILE"
+trap 'rm -f "$LOCK_FILE"' EXIT
+
+# ── Memory guard ──────────────────────────────────────────────────────────
+AVAIL_MB=$(free -m | awk '/Mem:/{print $7}')
+if [ "${AVAIL_MB:-0}" -lt 2000 ]; then
+  log "poll: skipping — only ${AVAIL_MB}MB available (need 2000)"
+  exit 0
+fi
+
+log "--- Prediction poll start ---"
+
+# ── Iterate over projects ─────────────────────────────────────────────────
+PROJECT_COUNT=0
+if [ -d "$PROJECTS_DIR" ]; then
+  for project_toml in "$PROJECTS_DIR"/*.toml; do
+    [ -f "$project_toml" ] || continue
+    PROJECT_COUNT=$((PROJECT_COUNT + 1))
+    log "starting prediction-agent for $(basename "$project_toml")"
+    if ! "$SCRIPT_DIR/prediction-agent.sh" "$project_toml"; then
+      log "prediction-agent exited non-zero for $(basename "$project_toml")"
+    fi
+  done
+fi
+
+if [ "$PROJECT_COUNT" -eq 0 ]; then
+  log "No projects/*.toml found — running prediction-agent with .env defaults"
+  if ! "$SCRIPT_DIR/prediction-agent.sh"; then
+    log "prediction-agent exited non-zero"
+  fi
+fi
+
+log "--- Prediction poll done ---"

From d2f788239aa2f7325001a4031d3ecaa95d5f6979 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 19 Mar 2026 10:13:22 +0000
Subject: [PATCH 2/2] fix: address review feedback on prediction agent (#140)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove CLAUDE_TIMEOUT no-op override — inherit factory default (7200s) from env.sh
- Use anchored grep -qxF "NO_PREDICTIONS" to avoid false early exits
- Fetch closed PRs (state=closed, merged_at filter) instead of open — captures merged activity signals
- Parse staleness age from filename date (YYYY-MM-DD.json) instead of file mtime
- Log a warning when date -d falls back due to non-GNU date
- Add comment explaining global lock serialisation trade-off

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 planner/prediction-agent.sh | 22 +++++++++++++++-------
 planner/prediction-poll.sh  |  3 +++
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/planner/prediction-agent.sh b/planner/prediction-agent.sh
index 59e89be..528e425 100755
--- a/planner/prediction-agent.sh
+++ b/planner/prediction-agent.sh
@@ -34,7 +34,7 @@ export PROJECT_TOML="${1:-}"
 source "$FACTORY_ROOT/lib/env.sh"
 
 LOG_FILE="$SCRIPT_DIR/prediction.log"
-CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}"
+# env.sh already exports CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"; inherit that default
 EVIDENCE_DIR="${PROJECT_REPO_ROOT}/evidence"
 
 log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
@@ -68,10 +68,13 @@ for subdir in red-team evolution user-test holdout resources protocol; do
     continue
   fi
 
-  file_ts=$(date -r "$latest" +%s)
+  latest_name=$(basename "$latest")
+  # Derive age from the date in the filename (YYYY-MM-DD.json) — more reliable
+  # than mtime, which changes when files are copied or synced.
+  file_date=$(basename "$latest" .json)
+  file_ts=$(date -d "$file_date" +%s 2>/dev/null || date -r "$latest" +%s)
   now_ts=$(date +%s)
   age_hours=$(( (now_ts - file_ts) / 3600 ))
-  latest_name=$(basename "$latest")
   content=$(head -c 3000 "$latest" 2>/dev/null || echo "{}")
 
   prev=$(prev_json "$subdir_path")
@@ -92,6 +95,9 @@ done
 
 # ── Secondary signals — Codeberg activity (last 24h) ─────────────────────
 SINCE_ISO=$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || true)
+if [ -z "$SINCE_ISO" ]; then
+  log "WARN: date -d '24 hours ago' failed (non-GNU date?) — skipping Codeberg activity"
+fi
 RECENT_ISSUES=""
 RECENT_PRS=""
 if [ -n "$SINCE_ISO" ]; then
@@ -99,9 +105,11 @@ if [ -n "$SINCE_ISO" ]; then
     jq -r --arg since "$SINCE_ISO" \
     '.[] | select(.created_at >= $since) | "  #\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' \
     2>/dev/null || true)
-  RECENT_PRS=$(codeberg_api GET "/pulls?state=open&limit=20&sort=newest" 2>/dev/null | \
+  # Use state=closed to capture recently-merged PRs — merged activity is the
+  # key signal (e.g. new red-team PR merged since last evolution run).
+  RECENT_PRS=$(codeberg_api GET "/pulls?state=closed&limit=20&sort=newest" 2>/dev/null | \
     jq -r --arg since "$SINCE_ISO" \
-    '.[] | select(.created_at >= $since) | "  #\(.number) \(.title)"' \
+    '.[] | select(.merged_at != null and .merged_at >= $since) | "  #\(.number) \(.title) (merged \(.merged_at[:10]))"' \
     2>/dev/null || true)
 fi
 
@@ -137,7 +145,7 @@ Active agent sessions (tmux): ${ACTIVE_SESSIONS}
 New issues:
 ${RECENT_ISSUES:-  (none)}
 
-Open PRs (recently updated):
+Recently merged PRs (last 24h):
 ${RECENT_PRS:-  (none)}
 
 ## Already-open predictions (do NOT duplicate these)
@@ -214,7 +222,7 @@ CLAUDE_OUTPUT=$(timeout "$CLAUDE_TIMEOUT" claude -p "$PROMPT" \
 
 log "claude finished ($(printf '%s' "$CLAUDE_OUTPUT" | wc -c) bytes)"
 
-if printf '%s' "$CLAUDE_OUTPUT" | grep -q "NO_PREDICTIONS"; then
+if printf '%s' "$CLAUDE_OUTPUT" | grep -qxF "NO_PREDICTIONS"; then
   log "no predictions — evidence looks healthy for ${PROJECT_NAME}"
   log "--- prediction-agent done ---"
   exit 0
diff --git a/planner/prediction-poll.sh b/planner/prediction-poll.sh
index 06bb316..18ed401 100755
--- a/planner/prediction-poll.sh
+++ b/planner/prediction-poll.sh
@@ -17,6 +17,9 @@ FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
 source "$FACTORY_ROOT/lib/env.sh"
 
 LOG_FILE="$SCRIPT_DIR/prediction.log"
+# Global lock — projects are processed serially. If a single run takes longer
+# than the cron interval (1h), the next cron invocation will find the lock held
+# and exit silently. That is acceptable: LLM calls are cheap to skip.
 LOCK_FILE="/tmp/prediction-poll.lock"
 PROJECTS_DIR="$FACTORY_ROOT/projects"