disinto/formulas/run-predictor.toml

# formulas/run-predictor.toml — Predictor v3: abstract adversary
#
# Goal: find the project's biggest weakness. Explore when uncertain,
# exploit when confident (dispatch a formula to prove the theory).
#
# Memory: previous predictions on the forge ARE the memory.
# No separate memory file — the issue tracker is the source of truth.
#
# Executed by predictor/predictor-run.sh via cron — no action issues.
# predictor-run.sh creates a tmux session with Claude (sonnet) and injects
# this formula as context. Claude executes all steps autonomously.
#
# Steps: preflight → find-weakness-and-act

name        = "run-predictor"
description = "Abstract adversary: find weaknesses, challenge the planner, generate evidence"
version     = 3
model       = "sonnet"

[context]
files = ["AGENTS.md", "VISION.md"]
# RESOURCES.md and prerequisites.md loaded from ops repo (ops: prefix)
graph_report = "Structural analysis JSON from lib/build-graph.py — orphans, cycles, thin objectives, bottlenecks"

[[steps]]
id    = "preflight"
title = "Pull latest and gather history"
description = """
Set up the working environment and load your prediction history.

1. Pull latest code:
     cd "$PROJECT_REPO_ROOT"
     git fetch origin "$PRIMARY_BRANCH" --quiet
     git checkout "$PRIMARY_BRANCH" --quiet
     git pull --ff-only origin "$PRIMARY_BRANCH" --quiet

2. Fetch ALL your previous predictions (open + recently closed):
     curl -sf -H "Authorization: token $FORGE_TOKEN" \
       "$FORGE_API/issues?state=open&type=issues&labels=prediction%2Funreviewed&limit=50"
     curl -sf -H "Authorization: token $FORGE_TOKEN" \
       "$FORGE_API/issues?state=closed&type=issues&labels=prediction%2Fdismissed&limit=50&sort=updated&direction=desc"
     curl -sf -H "Authorization: token $FORGE_TOKEN" \
       "$FORGE_API/issues?state=closed&type=issues&labels=prediction%2Factioned&limit=50&sort=updated&direction=desc"

   For each prediction, note:
   - What you predicted (title + body)
   - What the planner decided (comments — look for triage reasoning)
   - Outcome: actioned (planner valued it), dismissed (planner rejected it),
     unreviewed (planner hasn't seen it yet)

3. Read the prerequisite tree:
     cat "$OPS_REPO_ROOT/prerequisites.md"

4. Count evidence per claim area:
     for dir in evidence/red-team evidence/holdout evidence/evolution evidence/user-test; do
       echo "=== $dir ===$(find "$OPS_REPO_ROOT/$dir" -name '*.json' 2>/dev/null | wc -l) files"
       find "$OPS_REPO_ROOT/$dir" -name '*.json' -printf '%T+ %p\n' 2>/dev/null | sort -r | head -3
     done

5. Check current system state (lightweight — don't over-collect):
     free -m | head -2
     df -h / | tail -1
     tmux list-sessions 2>/dev/null || echo "no sessions"
"""

[[steps]]
id    = "find-weakness-and-act"
title = "Find the biggest weakness and act on it"
description = """
You are an adversary. Your job is to find what's wrong, weak, or untested
in this project. Not to help — to challenge.

## Your track record

Review your prediction history from the preflight step:
- Which predictions did the planner action? Those are areas where your
  instincts were right. The planner values those signals.
- Which were dismissed? You were wrong or the planner disagreed. Don't
  repeat the same theory without new evidence. If conditions have changed
  since the dismissal, you may re-file with stronger evidence.

## Finding weaknesses

Look at EVERYTHING available to you:
- The prerequisite tree — what does the planner claim is DONE? How much
  evidence backs that claim? A DONE item with 2 data points is weak.
- Evidence directories — which are empty? Which are stale?
- Portfolio — ## Addressables and ## Observables in AGENTS.md. Challenge
  addressables that have no observation path. Are there addressables the
  planner ignores? Observables with stale or missing evidence?
- VISION.md — what does "launched" require? Is the project on track?
- RESOURCES.md — what capabilities exist? What's missing?
- Open issues — are things stuck? Bouncing? Starved?
- Agent logs — is the factory healthy?
- External world — are there CVEs, breaking changes, or ecosystem shifts
  affecting project dependencies? (Use web search — max 3 searches.)

Don't scan everything every time. Use your history to focus:
- If you've never looked at evidence gaps → explore there
- If you found a crack last time → exploit it deeper
- If the planner just marked something DONE → challenge it

## Acting

You have up to 5 actions per run (predictions + dispatches combined).

For each weakness you identify, choose one:

**EXPLORE** — low confidence, need more information:
  File a prediction/unreviewed issue. The planner will triage it.

  Body format:
    <What you observed. Why it's a weakness. What could go wrong.>

    ---
    **Theory:** <your hypothesis>
    **Confidence:** <low|medium>
    **Evidence checked:** <what you looked at>
    **Suggested action:** <what the planner should consider>

**EXPLOIT** — high confidence, have a theory you can test:
  File a prediction/unreviewed issue AND a vault PR that dispatches
  a formula to generate evidence (AD-006: external actions go through vault).

  The prediction explains the theory. The vault PR triggers the proof
  after human approval. When the planner runs next, evidence is already there.

  Vault dispatch (requires lib/vault.sh):
    source "$PROJECT_REPO_ROOT/lib/vault.sh"

    TOML_CONTENT="# Vault action: predict-<prediction_number>-<formula>
context = \"Test prediction #<prediction_number>: <theory summary>\"
unblocks = [\"#<prediction_number>\"]

[execution]
formula = \"<formula-name>\"
focus = \"<specific test>\"
# Expected evidence: evidence/<dir>/<date>-<name>.json
"
    PR_NUM=$(vault_request "predict-<prediction_number>-<formula>" "$TOML_CONTENT")
    echo "Vault PR #${PR_NUM} filed to test prediction #<prediction_number>"

  Available formulas (check $PROJECT_REPO_ROOT/formulas/*.toml for current list):
    cat "$PROJECT_REPO_ROOT/formulas/"*.toml | grep '^name' | head -10

**SKIP** — nothing worth acting on:
  Valid outcome. Not every run needs to produce a prediction.
  But if you skip, write a brief note to your scratch file about why.

## Filing (use tea CLI — labels by name, no ID lookup needed)

tea is pre-configured with login "$TEA_LOGIN" and repo "$FORGE_REPO".

1. File predictions (labels by name, no ID lookup):
     tea issues create --login "$TEA_LOGIN" --repo "$FORGE_REPO" \
       --title "<title>" --body "<body>" --labels "prediction/unreviewed"

2. Dispatch formula via vault (if exploiting):
     source "$PROJECT_REPO_ROOT/lib/vault.sh"
     PR_NUM=$(vault_request "predict-NNN-<formula>" "$TOML_CONTENT")
     # See EXPLOIT section above for TOML_CONTENT format

3. Close superseded predictions:
     tea issues close <number> --login "$TEA_LOGIN" --repo "$FORGE_REPO"

4. Add a comment when closing (optional):
     tea comment create <number> --login "$TEA_LOGIN" --repo "$FORGE_REPO" \
       --body "Superseded by #NNN"

5. Do NOT duplicate existing open predictions. If your theory matches
   an open prediction/unreviewed issue, skip it.

## Rules

- Max 5 actions total (predictions + vault dispatches combined)
- Each exploit counts as 2 (prediction + vault dispatch)
- So: 5 explores, or 2 exploits + 1 explore, or 1 exploit + 3 explores
- Never re-file a dismissed prediction without new evidence
- Vault dispatches must reference existing formulas — don't invent formulas
- Be specific: name the file, the metric, the threshold, the formula
- If no weaknesses found, file nothing — that's a strong signal the project is healthy

After filing (or deciding to skip), write PHASE:done to the phase file:
  echo "PHASE:done" > "$PHASE_FILE"
"""
needs = ["preflight"]