Fixes #757 ## Changes Separate operations from code into {project}-ops repo pattern. Added OPS_REPO_ROOT infrastructure (env.sh, load-project.sh, formula-session.sh with ensure_ops_repo helper). Updated all 8 agent scripts and 7 formulas to read/write vault items, journals, evidence, prerequisites, RESOURCES.md, and knowledge from the ops repo. Added setup_ops_repo() to disinto init for automatic ops repo creation and seeding. Removed migrated data from code repo (vault data dirs, planner journal/memory/prerequisites, supervisor journal/best-practices, evidence, RESOURCES.md). Updated all documentation. 55 files changed, ShellCheck clean, all 38 phase tests pass. Co-authored-by: openhands <openhands@all-hands.dev> Reviewed-on: https://codeberg.org/johba/disinto/pulls/767 Reviewed-by: Disinto_bot <disinto_bot@noreply.codeberg.org>
This commit is contained in:
parent
a899fd0733
commit
71fe89cdd0
55 changed files with 421 additions and 932 deletions
19
AGENTS.md
19
AGENTS.md
|
|
@ -13,16 +13,14 @@ See `README.md` for the full architecture and `BOOTSTRAP.md` for setup.
|
||||||
## Directory layout
|
## Directory layout
|
||||||
|
|
||||||
```
|
```
|
||||||
disinto/
|
disinto/ (code repo)
|
||||||
├── dev/ dev-poll.sh, dev-agent.sh, phase-handler.sh — issue implementation
|
├── dev/ dev-poll.sh, dev-agent.sh, phase-handler.sh — issue implementation
|
||||||
├── review/ review-poll.sh, review-pr.sh — PR review
|
├── review/ review-poll.sh, review-pr.sh — PR review
|
||||||
├── gardener/ gardener-run.sh — direct cron executor for run-gardener formula
|
├── gardener/ gardener-run.sh — direct cron executor for run-gardener formula
|
||||||
├── predictor/ predictor-run.sh — daily cron executor for run-predictor formula
|
├── predictor/ predictor-run.sh — daily cron executor for run-predictor formula
|
||||||
├── planner/ planner-run.sh — direct cron executor for run-planner formula
|
├── planner/ planner-run.sh — direct cron executor for run-planner formula
|
||||||
│ planner/journal/ — daily raw logs from each planner run
|
|
||||||
├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper)
|
├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper)
|
||||||
│ preflight.sh — pre-flight data collection for supervisor formula
|
│ preflight.sh — pre-flight data collection for supervisor formula
|
||||||
│ supervisor/journal/ — daily health logs from each run
|
|
||||||
│ supervisor-poll.sh — legacy bash orchestrator (superseded)
|
│ supervisor-poll.sh — legacy bash orchestrator (superseded)
|
||||||
├── vault/ vault-poll.sh, vault-agent.sh, vault-fire.sh — action gating + procurement
|
├── vault/ vault-poll.sh, vault-agent.sh, vault-fire.sh — action gating + procurement
|
||||||
├── action/ action-poll.sh, action-agent.sh — operational task execution
|
├── action/ action-poll.sh, action-agent.sh — operational task execution
|
||||||
|
|
@ -30,6 +28,21 @@ disinto/
|
||||||
├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored)
|
├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored)
|
||||||
├── formulas/ Issue templates (TOML specs for multi-step agent tasks)
|
├── formulas/ Issue templates (TOML specs for multi-step agent tasks)
|
||||||
└── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md)
|
└── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md)
|
||||||
|
|
||||||
|
disinto-ops/ (ops repo — {project}-ops)
|
||||||
|
├── vault/
|
||||||
|
│ ├── pending/ vault items awaiting approval
|
||||||
|
│ ├── approved/ approved vault items
|
||||||
|
│ ├── fired/ executed vault items
|
||||||
|
│ └── rejected/ rejected vault items
|
||||||
|
├── journal/
|
||||||
|
│ ├── planner/ daily planning logs
|
||||||
|
│ └── supervisor/ operational health logs
|
||||||
|
├── knowledge/ shared agent knowledge + best practices
|
||||||
|
├── evidence/ engagement data, experiment results
|
||||||
|
├── portfolio.md addressables + observables
|
||||||
|
├── prerequisites.md dependency graph
|
||||||
|
└── RESOURCES.md accounts, tokens (refs), infra inventory
|
||||||
```
|
```
|
||||||
|
|
||||||
> **Terminology note:** "Formulas" in this repo are TOML issue templates in `formulas/` that
|
> **Terminology note:** "Formulas" in this repo are TOML issue templates in `formulas/` that
|
||||||
|
|
|
||||||
37
RESOURCES.md
37
RESOURCES.md
|
|
@ -1,37 +0,0 @@
|
||||||
# RESOURCES.md — Factory Capability Inventory
|
|
||||||
|
|
||||||
## harb-staging
|
|
||||||
- type: compute
|
|
||||||
- capability: run disinto agents, serve website, CI server
|
|
||||||
- agents: dev, review, action, gardener, supervisor, planner, predictor
|
|
||||||
- ram: 8GB
|
|
||||||
- note: disinto-only — no other project agents on this box
|
|
||||||
|
|
||||||
## codeberg-johba
|
|
||||||
- type: source-control
|
|
||||||
- capability: host repos, issue tracker, PR workflow, API access
|
|
||||||
- repos: johba/disinto
|
|
||||||
- note: owner account
|
|
||||||
|
|
||||||
## codeberg-disinto-bot
|
|
||||||
- type: source-control
|
|
||||||
- capability: review PRs, merge PRs, push branches
|
|
||||||
- repos: johba/disinto
|
|
||||||
- note: bot account, push+pull permissions, no admin
|
|
||||||
|
|
||||||
## woodpecker-ci
|
|
||||||
- type: ci
|
|
||||||
- capability: run pipelines on PR and push events, docker backend
|
|
||||||
- url: ci.niovi.voyage
|
|
||||||
- note: self-hosted on harb-staging
|
|
||||||
|
|
||||||
## disinto-ai
|
|
||||||
- type: asset
|
|
||||||
- capability: static site, landing page, dashboard
|
|
||||||
- domain: disinto.ai, www.disinto.ai
|
|
||||||
- note: served by Caddy on harb-staging
|
|
||||||
|
|
||||||
## telegram-clawy
|
|
||||||
- type: communication
|
|
||||||
- capability: notify human, collect decisions, relay vault requests
|
|
||||||
- note: OpenClaw bot, human's primary interface
|
|
||||||
128
bin/disinto
128
bin/disinto
|
|
@ -699,6 +699,127 @@ setup_forge() {
|
||||||
echo "Forge: ${forge_url} (ready)"
|
echo "Forge: ${forge_url} (ready)"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Create and seed the {project}-ops repo on Forgejo with initial directory structure.
|
||||||
|
# The ops repo holds operational data: vault items, journals, evidence, prerequisites.
|
||||||
|
setup_ops_repo() {
|
||||||
|
local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}"
|
||||||
|
local org_name="${ops_slug%%/*}"
|
||||||
|
local ops_name="${ops_slug##*/}"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "── Ops repo setup ─────────────────────────────────────"
|
||||||
|
|
||||||
|
# Check if ops repo already exists on Forgejo
|
||||||
|
if curl -sf --max-time 5 \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
"${forge_url}/api/v1/repos/${ops_slug}" >/dev/null 2>&1; then
|
||||||
|
echo "Ops repo: ${ops_slug} (already exists on Forgejo)"
|
||||||
|
else
|
||||||
|
# Create ops repo under org
|
||||||
|
if ! curl -sf -X POST \
|
||||||
|
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
"${forge_url}/api/v1/orgs/${org_name}/repos" \
|
||||||
|
-d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then
|
||||||
|
# Fallback: create under the user
|
||||||
|
curl -sf -X POST \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
"${forge_url}/api/v1/user/repos" \
|
||||||
|
-d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data\"}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Add all bot users as collaborators
|
||||||
|
local bot_user
|
||||||
|
for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot action-bot; do
|
||||||
|
curl -sf -X PUT \
|
||||||
|
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
"${forge_url}/api/v1/repos/${ops_slug}/collaborators/${bot_user}" \
|
||||||
|
-d '{"permission":"write"}' >/dev/null 2>&1 || true
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Ops repo: ${ops_slug} created on Forgejo"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Clone ops repo locally if not present
|
||||||
|
if [ ! -d "${ops_root}/.git" ]; then
|
||||||
|
local auth_url
|
||||||
|
auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|")
|
||||||
|
local clone_url="${auth_url}/${ops_slug}.git"
|
||||||
|
echo "Cloning: ops repo -> ${ops_root}"
|
||||||
|
git clone --quiet "$clone_url" "$ops_root" 2>/dev/null || {
|
||||||
|
echo "Initializing: ops repo at ${ops_root}"
|
||||||
|
mkdir -p "$ops_root"
|
||||||
|
git -C "$ops_root" init --initial-branch="${primary_branch}" -q
|
||||||
|
}
|
||||||
|
else
|
||||||
|
echo "Ops repo: ${ops_root} (already exists locally)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Seed directory structure
|
||||||
|
local seeded=false
|
||||||
|
mkdir -p "${ops_root}/vault/pending"
|
||||||
|
mkdir -p "${ops_root}/vault/approved"
|
||||||
|
mkdir -p "${ops_root}/vault/fired"
|
||||||
|
mkdir -p "${ops_root}/vault/rejected"
|
||||||
|
mkdir -p "${ops_root}/journal/planner"
|
||||||
|
mkdir -p "${ops_root}/journal/supervisor"
|
||||||
|
mkdir -p "${ops_root}/knowledge"
|
||||||
|
mkdir -p "${ops_root}/evidence/engagement"
|
||||||
|
|
||||||
|
if [ ! -f "${ops_root}/README.md" ]; then
|
||||||
|
cat > "${ops_root}/README.md" <<OPSEOF
|
||||||
|
# ${ops_name}
|
||||||
|
|
||||||
|
Operational data for the ${ops_name%-ops} project.
|
||||||
|
|
||||||
|
## Structure
|
||||||
|
|
||||||
|
\`\`\`
|
||||||
|
${ops_name}/
|
||||||
|
├── vault/
|
||||||
|
│ ├── pending/ # vault items awaiting approval
|
||||||
|
│ ├── approved/ # approved vault items
|
||||||
|
│ ├── fired/ # executed vault items
|
||||||
|
│ └── rejected/ # rejected vault items
|
||||||
|
├── journal/
|
||||||
|
│ ├── planner/ # daily planning logs
|
||||||
|
│ └── supervisor/ # operational health logs
|
||||||
|
├── knowledge/ # shared agent knowledge and best practices
|
||||||
|
├── evidence/ # engagement data, experiment results
|
||||||
|
├── portfolio.md # addressables + observables
|
||||||
|
├── prerequisites.md # dependency graph
|
||||||
|
└── RESOURCES.md # accounts, tokens (refs), infra inventory
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
## Branch protection
|
||||||
|
|
||||||
|
- \`main\`: 2 reviewers required for vault items
|
||||||
|
- Journal/evidence commits may use lighter rules
|
||||||
|
OPSEOF
|
||||||
|
seeded=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create stub files if they don't exist
|
||||||
|
[ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; }
|
||||||
|
[ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; }
|
||||||
|
[ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; }
|
||||||
|
|
||||||
|
# Commit and push seed content
|
||||||
|
if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then
|
||||||
|
git -C "$ops_root" add -A
|
||||||
|
if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then
|
||||||
|
git -C "$ops_root" commit -m "chore: seed ops repo structure" -q
|
||||||
|
# Push if remote exists
|
||||||
|
if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then
|
||||||
|
git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo "Seeded: ops repo with initial structure"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# Push local clone to the Forgejo remote.
|
# Push local clone to the Forgejo remote.
|
||||||
push_to_forge() {
|
push_to_forge() {
|
||||||
local repo_root="$1" forge_url="$2" repo_slug="$3"
|
local repo_root="$1" forge_url="$2" repo_slug="$3"
|
||||||
|
|
@ -874,8 +995,10 @@ generate_toml() {
|
||||||
|
|
||||||
name = "${name}"
|
name = "${name}"
|
||||||
repo = "${repo}"
|
repo = "${repo}"
|
||||||
|
ops_repo = "${repo}-ops"
|
||||||
forge_url = "${forge_url}"
|
forge_url = "${forge_url}"
|
||||||
repo_root = "${root}"
|
repo_root = "${root}"
|
||||||
|
ops_repo_root = "/home/${USER}/${name}-ops"
|
||||||
primary_branch = "${branch}"
|
primary_branch = "${branch}"
|
||||||
|
|
||||||
[ci]
|
[ci]
|
||||||
|
|
@ -1290,6 +1413,11 @@ p.write_text(text)
|
||||||
fi
|
fi
|
||||||
echo "Branch: ${branch}"
|
echo "Branch: ${branch}"
|
||||||
|
|
||||||
|
# Set up {project}-ops repo (#757)
|
||||||
|
local ops_slug="${forge_repo}-ops"
|
||||||
|
local ops_root="/home/${USER}/${project_name}-ops"
|
||||||
|
setup_ops_repo "$forge_url" "$ops_slug" "$ops_root" "$branch"
|
||||||
|
|
||||||
# Generate project TOML (skip if already exists)
|
# Generate project TOML (skip if already exists)
|
||||||
if [ "$toml_exists" = false ]; then
|
if [ "$toml_exists" = false ]; then
|
||||||
# Prompt for CI ID if interactive and not already set via flag
|
# Prompt for CI ID if interactive and not already set via flag
|
||||||
|
|
|
||||||
|
|
@ -35,14 +35,14 @@ Different domains have different platforms:
|
||||||
Agents won't need to understand each platform. **Processes act as adapters** — they will read a platform's API and write structured evidence to git.
|
Agents won't need to understand each platform. **Processes act as adapters** — they will read a platform's API and write structured evidence to git.
|
||||||
|
|
||||||
```
|
```
|
||||||
[Caddy logs] ──→ collect-engagement process ──→ evidence/engagement/YYYY-MM-DD.json
|
[Caddy logs] ──→ collect-engagement process ──→ {project}-ops/evidence/engagement/YYYY-MM-DD.json
|
||||||
[Google Analytics] ──→ measure-funnel process ──→ evidence/funnel/YYYY-MM-DD.json
|
[Google Analytics] ──→ measure-funnel process ──→ {project}-ops/evidence/funnel/YYYY-MM-DD.json
|
||||||
[Ponder GraphQL] ──→ measure-protocol process ──→ evidence/protocol/YYYY-MM-DD.json
|
[Ponder GraphQL] ──→ measure-protocol process ──→ {project}-ops/evidence/protocol/YYYY-MM-DD.json
|
||||||
[System stats] ──→ measure-resources process ──→ evidence/resources/YYYY-MM-DD.json
|
[System stats] ──→ measure-resources process ──→ {project}-ops/evidence/resources/YYYY-MM-DD.json
|
||||||
[Playwright] ──→ run-user-test process ──→ evidence/user-test/YYYY-MM-DD.json
|
[Playwright] ──→ run-user-test process ──→ {project}-ops/evidence/user-test/YYYY-MM-DD.json
|
||||||
```
|
```
|
||||||
|
|
||||||
The planner will read `evidence/` — not Analytics, not Ponder, not DigitalOcean. Evidence is the normalized interface between the world and decisions.
|
The planner will read `$OPS_REPO_ROOT/evidence/` — not Analytics, not Ponder, not DigitalOcean. Evidence is the normalized interface between the world and decisions.
|
||||||
|
|
||||||
> **Terminology note — "process" vs "formula":** In this document, "process" means a self-contained measurement or mutation pipeline that reads an external platform and writes structured evidence to git. This is distinct from disinto's "formulas" (`formulas/*.toml`), which are TOML issue templates that guide agents through multi-step operational work (see `AGENTS.md` § Directory layout). Processes produce evidence; formulas orchestrate agent tasks.
|
> **Terminology note — "process" vs "formula":** In this document, "process" means a self-contained measurement or mutation pipeline that reads an external platform and writes structured evidence to git. This is distinct from disinto's "formulas" (`formulas/*.toml`), which are TOML issue templates that guide agents through multi-step operational work (see `AGENTS.md` § Directory layout). Processes produce evidence; formulas orchestrate agent tasks.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ The dev-agent is completely starved until they are promoted or resolved.
|
||||||
For each tier-0 issue:
|
For each tier-0 issue:
|
||||||
- Read the full body: curl -sf -H "Authorization: token $FORGE_TOKEN" "$FORGE_API/issues/{number}"
|
- Read the full body: curl -sf -H "Authorization: token $FORGE_TOKEN" "$FORGE_API/issues/{number}"
|
||||||
- If resolvable: promote to backlog — add acceptance criteria, affected files, relabel
|
- If resolvable: promote to backlog — add acceptance criteria, affected files, relabel
|
||||||
- If needs human decision: file a vault procurement item (vault/pending/<id>.md)
|
- If needs human decision: file a vault procurement item ($OPS_REPO_ROOT/vault/pending/<id>.md)
|
||||||
- If invalid / wontfix: close with explanation comment
|
- If invalid / wontfix: close with explanation comment
|
||||||
|
|
||||||
After completing all tier-0, re-fetch to check for new blockers:
|
After completing all tier-0, re-fetch to check for new blockers:
|
||||||
|
|
@ -136,7 +136,7 @@ DUPLICATE (>80% overlap after reading both bodies — confirm before closing):
|
||||||
Write: echo "ACTION: closed #NNN as duplicate of #OLDER" >> "$RESULT_FILE"
|
Write: echo "ACTION: closed #NNN as duplicate of #OLDER" >> "$RESULT_FILE"
|
||||||
|
|
||||||
VAULT (ambiguous scope, architectural question, needs human decision):
|
VAULT (ambiguous scope, architectural question, needs human decision):
|
||||||
File a vault procurement item at $PROJECT_REPO_ROOT/vault/pending/<id>.md:
|
File a vault procurement item at $OPS_REPO_ROOT/vault/pending/<id>.md:
|
||||||
# <What decision or resource is needed>
|
# <What decision or resource is needed>
|
||||||
## What
|
## What
|
||||||
<description>
|
<description>
|
||||||
|
|
@ -144,7 +144,7 @@ VAULT (ambiguous scope, architectural question, needs human decision):
|
||||||
<which issue this unblocks>
|
<which issue this unblocks>
|
||||||
## Unblocks
|
## Unblocks
|
||||||
- #NNN — <title>
|
- #NNN — <title>
|
||||||
Log: echo "VAULT: filed vault/pending/<id>.md for #NNN — <reason>" >> "$RESULT_FILE"
|
Log: echo "VAULT: filed $OPS_REPO_ROOT/vault/pending/<id>.md for #NNN — <reason>" >> "$RESULT_FILE"
|
||||||
|
|
||||||
Dust vs ore rules:
|
Dust vs ore rules:
|
||||||
Dust: comment fix, variable rename, whitespace/formatting, single-line edit, trivial cleanup with no behavior change
|
Dust: comment fix, variable rename, whitespace/formatting, single-line edit, trivial cleanup with no behavior change
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ Do NOT flag:
|
||||||
|
|
||||||
## 4. Vault item quality (conditional)
|
## 4. Vault item quality (conditional)
|
||||||
|
|
||||||
If the PR adds or modifies files in `vault/pending/*.md`, apply these
|
If the PR adds or modifies vault item files (`vault/pending/*.md` in the ops repo), apply these
|
||||||
additional checks. These criteria apply ON TOP of the normal review —
|
additional checks. These criteria apply ON TOP of the normal review —
|
||||||
a vault PR must also pass the standard checklist above.
|
a vault PR must also pass the standard checklist above.
|
||||||
|
|
||||||
|
|
@ -102,9 +102,9 @@ propose a specific action.
|
||||||
|
|
||||||
### Dedup check
|
### Dedup check
|
||||||
|
|
||||||
Check whether `vault/pending/`, `vault/approved/`, or `vault/fired/`
|
Check whether `$OPS_REPO_ROOT/vault/pending/`, `$OPS_REPO_ROOT/vault/approved/`, or `$OPS_REPO_ROOT/vault/fired/`
|
||||||
already contains a similar item (same resource, same ask). List the
|
already contains a similar item (same resource, same ask). List the
|
||||||
vault directories to inspect existing items. If a duplicate or
|
vault directories in the ops repo to inspect existing items. If a duplicate or
|
||||||
near-duplicate exists, REQUEST_CHANGES and reference the existing item.
|
near-duplicate exists, REQUEST_CHANGES and reference the existing item.
|
||||||
|
|
||||||
## 5. External action detection (token separation)
|
## 5. External action detection (token separation)
|
||||||
|
|
@ -112,7 +112,7 @@ near-duplicate exists, REQUEST_CHANGES and reference the existing item.
|
||||||
Agents must NEVER execute external actions directly. Any action that touches
|
Agents must NEVER execute external actions directly. Any action that touches
|
||||||
an external system (publish, deploy, post, push to external registry, API
|
an external system (publish, deploy, post, push to external registry, API
|
||||||
calls to third-party services) MUST go through vault dispatch — i.e., the
|
calls to third-party services) MUST go through vault dispatch — i.e., the
|
||||||
agent files a vault item (`vault/pending/*.json`) and the vault-runner
|
agent files a vault item (`$OPS_REPO_ROOT/vault/pending/*.json`) and the vault-runner
|
||||||
container executes it with injected secrets.
|
container executes it with injected secrets.
|
||||||
|
|
||||||
Scan the diff for these patterns:
|
Scan the diff for these patterns:
|
||||||
|
|
@ -128,7 +128,7 @@ Scan the diff for these patterns:
|
||||||
|
|
||||||
If ANY of these patterns appear in agent code (scripts in `dev/`, `action/`,
|
If ANY of these patterns appear in agent code (scripts in `dev/`, `action/`,
|
||||||
`planner/`, `gardener/`, `supervisor/`, `predictor/`, `review/`, `formulas/`,
|
`planner/`, `gardener/`, `supervisor/`, `predictor/`, `review/`, `formulas/`,
|
||||||
`lib/`) WITHOUT routing through vault dispatch (`vault/pending/`, `vault-fire.sh`,
|
`lib/`) WITHOUT routing through vault dispatch (`$OPS_REPO_ROOT/vault/pending/`, `vault-fire.sh`,
|
||||||
`vault-run-action.sh`), **REQUEST_CHANGES**.
|
`vault-run-action.sh`), **REQUEST_CHANGES**.
|
||||||
|
|
||||||
Explain that external actions must use vault dispatch per AD-006. The agent
|
Explain that external actions must use vault dispatch per AD-006. The agent
|
||||||
|
|
|
||||||
|
|
@ -120,7 +120,7 @@ DUST (trivial — single-line edit, rename, comment, style, whitespace):
|
||||||
of 3+ into one backlog issue.
|
of 3+ into one backlog issue.
|
||||||
|
|
||||||
VAULT (needs human decision or external resource):
|
VAULT (needs human decision or external resource):
|
||||||
File a vault procurement item at $PROJECT_REPO_ROOT/vault/pending/<id>.md:
|
File a vault procurement item at $OPS_REPO_ROOT/vault/pending/<id>.md:
|
||||||
# <What decision or resource is needed>
|
# <What decision or resource is needed>
|
||||||
## What
|
## What
|
||||||
<description>
|
<description>
|
||||||
|
|
@ -128,7 +128,7 @@ VAULT (needs human decision or external resource):
|
||||||
<which issue this unblocks>
|
<which issue this unblocks>
|
||||||
## Unblocks
|
## Unblocks
|
||||||
- #NNN — <title>
|
- #NNN — <title>
|
||||||
Log: echo "VAULT: filed vault/pending/<id>.md for #NNN — <reason>" >> "$RESULT_FILE"
|
Log: echo "VAULT: filed $OPS_REPO_ROOT/vault/pending/<id>.md for #NNN — <reason>" >> "$RESULT_FILE"
|
||||||
|
|
||||||
CLEAN (only if truly nothing to do):
|
CLEAN (only if truly nothing to do):
|
||||||
echo 'CLEAN' >> "$RESULT_FILE"
|
echo 'CLEAN' >> "$RESULT_FILE"
|
||||||
|
|
|
||||||
|
|
@ -21,8 +21,9 @@ version = 4
|
||||||
model = "opus"
|
model = "opus"
|
||||||
|
|
||||||
[context]
|
[context]
|
||||||
files = ["VISION.md", "AGENTS.md", "RESOURCES.md", "planner/prerequisite-tree.md"]
|
files = ["VISION.md", "AGENTS.md"]
|
||||||
# Recent planner/journal/*.md files + graph report loaded by planner-run.sh
|
# RESOURCES.md and prerequisites.md loaded from ops repo (ops: prefix)
|
||||||
|
# Recent journal/planner/*.md files + graph report loaded by planner-run.sh
|
||||||
|
|
||||||
[[steps]]
|
[[steps]]
|
||||||
id = "preflight"
|
id = "preflight"
|
||||||
|
|
@ -40,10 +41,10 @@ description = """
|
||||||
HEAD_SHA=$(git rev-parse HEAD)
|
HEAD_SHA=$(git rev-parse HEAD)
|
||||||
echo "$HEAD_SHA" > /tmp/planner-head-sha
|
echo "$HEAD_SHA" > /tmp/planner-head-sha
|
||||||
|
|
||||||
4. Read the planner memory file at: $PROJECT_REPO_ROOT/planner/MEMORY.md
|
4. Read the planner memory file at: $OPS_REPO_ROOT/knowledge/planner-memory.md
|
||||||
If it does not exist, this is the first planning run.
|
If it does not exist, this is the first planning run.
|
||||||
|
|
||||||
5. Read the prerequisite tree at: $PROJECT_REPO_ROOT/planner/prerequisite-tree.md
|
5. Read the prerequisite tree at: $OPS_REPO_ROOT/prerequisites.md
|
||||||
If it does not exist, create an initial tree from VISION.md in the next step.
|
If it does not exist, create an initial tree from VISION.md in the next step.
|
||||||
|
|
||||||
6. Read the graph report injected into the prompt (## Structural analysis).
|
6. Read the graph report injected into the prompt (## Structural analysis).
|
||||||
|
|
@ -121,7 +122,7 @@ Update the tree:
|
||||||
2. Recalculate objective status (READY/BLOCKED/DONE)
|
2. Recalculate objective status (READY/BLOCKED/DONE)
|
||||||
3. Add new prerequisites discovered from graph report
|
3. Add new prerequisites discovered from graph report
|
||||||
4. Add new objectives from VISION.md not yet in tree
|
4. Add new objectives from VISION.md not yet in tree
|
||||||
5. Check vault state: vault/pending/*.md + vault/approved/*.md (blocked-on-vault), vault/fired/*.md (resolved?)
|
5. Check vault state: $OPS_REPO_ROOT/vault/pending/*.md + $OPS_REPO_ROOT/vault/approved/*.md (blocked-on-vault), $OPS_REPO_ROOT/vault/fired/*.md (resolved?)
|
||||||
6. Check RESOURCES.md for newly available capabilities
|
6. Check RESOURCES.md for newly available capabilities
|
||||||
|
|
||||||
Bounce/stuck detection — for issues in the tree, fetch recent comments:
|
Bounce/stuck detection — for issues in the tree, fetch recent comments:
|
||||||
|
|
@ -141,7 +142,7 @@ Tree format:
|
||||||
## Objective: <name> (#issue or description)
|
## Objective: <name> (#issue or description)
|
||||||
- [x] Resolved prerequisite (reference)
|
- [x] Resolved prerequisite (reference)
|
||||||
- [ ] Unresolved prerequisite (#issue or description)
|
- [ ] Unresolved prerequisite (#issue or description)
|
||||||
- [ ] Resource need blocked-on-vault (vault/pending/<id>.md)
|
- [ ] Resource need blocked-on-vault ($OPS_REPO_ROOT/vault/pending/<id>.md)
|
||||||
Status: READY | BLOCKED — <reason> | DONE
|
Status: READY | BLOCKED — <reason> | DONE
|
||||||
|
|
||||||
### Part C: File at constraints
|
### Part C: File at constraints
|
||||||
|
|
@ -157,7 +158,7 @@ Stuck issue handling:
|
||||||
procurement item instead of skipping. First check for duplicates across ALL
|
procurement item instead of skipping. First check for duplicates across ALL
|
||||||
vault directories (pending/, approved/, fired/) — if a file with the same
|
vault directories (pending/, approved/, fired/) — if a file with the same
|
||||||
slug already exists in any of them, do NOT create a new one.
|
slug already exists in any of them, do NOT create a new one.
|
||||||
Naming: vault/pending/<project>-<slug>.md (e.g. disinto-github-org.md).
|
Naming: $OPS_REPO_ROOT/vault/pending/<project>-<slug>.md (e.g. disinto-github-org.md).
|
||||||
Write with this template:
|
Write with this template:
|
||||||
|
|
||||||
# Request: <short description>
|
# Request: <short description>
|
||||||
|
|
@ -181,7 +182,7 @@ Stuck issue handling:
|
||||||
## Unblocks
|
## Unblocks
|
||||||
- #<issue> — <title>
|
- #<issue> — <title>
|
||||||
|
|
||||||
Then mark the prerequisite in the tree as "blocked-on-vault (vault/pending/<id>.md)".
|
Then mark the prerequisite in the tree as "blocked-on-vault ($OPS_REPO_ROOT/vault/pending/<id>.md)".
|
||||||
Do NOT skip or mark as "awaiting human decision" — the vault owns the human interface.
|
Do NOT skip or mark as "awaiting human decision" — the vault owns the human interface.
|
||||||
|
|
||||||
Filing gate (for non-stuck constraints):
|
Filing gate (for non-stuck constraints):
|
||||||
|
|
@ -197,9 +198,9 @@ Priority label sync:
|
||||||
"$FORGE_API/issues/<num>/labels/<priority_label_id>"
|
"$FORGE_API/issues/<num>/labels/<priority_label_id>"
|
||||||
|
|
||||||
Vault procurement: if a constraint needs a resource not in RESOURCES.md with
|
Vault procurement: if a constraint needs a resource not in RESOURCES.md with
|
||||||
recurring cost, create vault/pending/<project>-<slug>.md instead of an issue.
|
recurring cost, create $OPS_REPO_ROOT/vault/pending/<project>-<slug>.md instead of an issue.
|
||||||
Use the same template as HUMAN_BLOCKED above (What/Why/Human action/Factory will then/Unblocks).
|
Use the same template as HUMAN_BLOCKED above (What/Why/Human action/Factory will then/Unblocks).
|
||||||
Dedup: check vault/pending/ + vault/approved/ + vault/fired/ before creating.
|
Dedup: check $OPS_REPO_ROOT/vault/pending/ + $OPS_REPO_ROOT/vault/approved/ + $OPS_REPO_ROOT/vault/fired/ before creating.
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- Action budget: the planner may create at most (predictions_addressed + 1)
|
- Action budget: the planner may create at most (predictions_addressed + 1)
|
||||||
|
|
@ -220,10 +221,10 @@ id = "journal-and-commit"
|
||||||
title = "Write tree, journal, optional memory; commit and PR"
|
title = "Write tree, journal, optional memory; commit and PR"
|
||||||
description = """
|
description = """
|
||||||
### 1. Write prerequisite tree
|
### 1. Write prerequisite tree
|
||||||
Write to: $PROJECT_REPO_ROOT/planner/prerequisite-tree.md
|
Write to: $OPS_REPO_ROOT/prerequisites.md
|
||||||
|
|
||||||
### 2. Write journal entry
|
### 2. Write journal entry
|
||||||
Create/append to: $PROJECT_REPO_ROOT/planner/journal/$(date -u +%Y-%m-%d).md
|
Create/append to: $OPS_REPO_ROOT/journal/planner/$(date -u +%Y-%m-%d).md
|
||||||
|
|
||||||
Format:
|
Format:
|
||||||
# Planner run — YYYY-MM-DD HH:MM UTC
|
# Planner run — YYYY-MM-DD HH:MM UTC
|
||||||
|
|
@ -242,7 +243,7 @@ Format:
|
||||||
(or "No stuck issues detected")
|
(or "No stuck issues detected")
|
||||||
|
|
||||||
## Vault items filed
|
## Vault items filed
|
||||||
- vault/pending/<id>.md — <what> — blocks #NNN
|
- $OPS_REPO_ROOT/vault/pending/<id>.md — <what> — blocks #NNN
|
||||||
(or "No vault items filed")
|
(or "No vault items filed")
|
||||||
|
|
||||||
## Issues created
|
## Issues created
|
||||||
|
|
@ -261,28 +262,21 @@ Keep concise — 30-50 lines max.
|
||||||
|
|
||||||
### 3. Memory update (every 5th run)
|
### 3. Memory update (every 5th run)
|
||||||
Count "# Planner run —" headers across all journal files.
|
Count "# Planner run —" headers across all journal files.
|
||||||
Check "<!-- summarized-through-run: N -->" in MEMORY.md.
|
Check "<!-- summarized-through-run: N -->" in planner-memory.md.
|
||||||
If (count - N) >= 5 or MEMORY.md missing, write to:
|
If (count - N) >= 5 or planner-memory.md missing, write to:
|
||||||
$PROJECT_REPO_ROOT/planner/MEMORY.md
|
$OPS_REPO_ROOT/knowledge/planner-memory.md
|
||||||
Include: run counter marker, date, constraint focus, patterns, direction.
|
Include: run counter marker, date, constraint focus, patterns, direction.
|
||||||
Keep under 100 lines. Replace entire file.
|
Keep under 100 lines. Replace entire file.
|
||||||
|
|
||||||
### 4. Commit and PR
|
### 4. Commit ops repo changes
|
||||||
If no file changes (git status --porcelain), skip.
|
Commit the ops repo changes (prerequisites, journal, memory, vault items):
|
||||||
Otherwise:
|
cd "$OPS_REPO_ROOT"
|
||||||
BRANCH="chore/planner-$(date -u +%Y%m%d-%H%M)"
|
git add prerequisites.md journal/planner/ knowledge/planner-memory.md vault/pending/
|
||||||
git checkout -B "$BRANCH"
|
|
||||||
git add planner/prerequisite-tree.md planner/journal/ planner/MEMORY.md vault/pending/
|
|
||||||
git add -u
|
git add -u
|
||||||
git diff --cached --quiet && skip
|
if ! git diff --cached --quiet; then
|
||||||
git commit -m "chore: planner run $(date -u +%Y-%m-%d)"
|
git commit -m "chore: planner run $(date -u +%Y-%m-%d)"
|
||||||
git push -u origin "$BRANCH"
|
git push origin "$PRIMARY_BRANCH"
|
||||||
Create PR via forge API:
|
fi
|
||||||
curl -sf -X POST -H "Authorization: token $FORGE_TOKEN" \
|
cd "$PROJECT_REPO_ROOT"
|
||||||
-H "Content-Type: application/json" "$FORGE_API/pulls" \
|
|
||||||
-d '{"title":"chore: planner run — prerequisite tree update",
|
|
||||||
"head":"<branch>","base":"<primary-branch>",
|
|
||||||
"body":"Automated planner run — prerequisite tree update and journal entry."}'
|
|
||||||
git checkout "$PRIMARY_BRANCH"
|
|
||||||
"""
|
"""
|
||||||
needs = ["triage-and-plan"]
|
needs = ["triage-and-plan"]
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,8 @@ version = 3
|
||||||
model = "sonnet"
|
model = "sonnet"
|
||||||
|
|
||||||
[context]
|
[context]
|
||||||
files = ["AGENTS.md", "RESOURCES.md", "VISION.md", "planner/prerequisite-tree.md"]
|
files = ["AGENTS.md", "VISION.md"]
|
||||||
|
# RESOURCES.md and prerequisites.md loaded from ops repo (ops: prefix)
|
||||||
graph_report = "Structural analysis JSON from lib/build-graph.py — orphans, cycles, thin objectives, bottlenecks"
|
graph_report = "Structural analysis JSON from lib/build-graph.py — orphans, cycles, thin objectives, bottlenecks"
|
||||||
|
|
||||||
[[steps]]
|
[[steps]]
|
||||||
|
|
@ -48,12 +49,12 @@ Set up the working environment and load your prediction history.
|
||||||
unreviewed (planner hasn't seen it yet)
|
unreviewed (planner hasn't seen it yet)
|
||||||
|
|
||||||
3. Read the prerequisite tree:
|
3. Read the prerequisite tree:
|
||||||
cat "$PROJECT_REPO_ROOT/planner/prerequisite-tree.md"
|
cat "$OPS_REPO_ROOT/prerequisites.md"
|
||||||
|
|
||||||
4. Count evidence per claim area:
|
4. Count evidence per claim area:
|
||||||
for dir in evidence/red-team evidence/holdout evidence/evolution evidence/user-test; do
|
for dir in evidence/red-team evidence/holdout evidence/evolution evidence/user-test; do
|
||||||
echo "=== $dir ===$(find "$PROJECT_REPO_ROOT/$dir" -name '*.json' 2>/dev/null | wc -l) files"
|
echo "=== $dir ===$(find "$OPS_REPO_ROOT/$dir" -name '*.json' 2>/dev/null | wc -l) files"
|
||||||
find "$PROJECT_REPO_ROOT/$dir" -name '*.json' -printf '%T+ %p\n' 2>/dev/null | sort -r | head -3
|
find "$OPS_REPO_ROOT/$dir" -name '*.json' -printf '%T+ %p\n' 2>/dev/null | sort -r | head -3
|
||||||
done
|
done
|
||||||
|
|
||||||
5. Check current system state (lightweight — don't over-collect):
|
5. Check current system state (lightweight — don't over-collect):
|
||||||
|
|
|
||||||
|
|
@ -209,7 +209,7 @@ Check 2 — collect-engagement.sh is present in the repo:
|
||||||
fi
|
fi
|
||||||
|
|
||||||
Check 3 — engagement evidence has been collected at least once:
|
Check 3 — engagement evidence has been collected at least once:
|
||||||
EVIDENCE_DIR="$FACTORY_ROOT/evidence/engagement"
|
EVIDENCE_DIR="$OPS_REPO_ROOT/evidence/engagement"
|
||||||
LATEST=$(ls -1t "$EVIDENCE_DIR"/*.json 2>/dev/null | head -1 || true)
|
LATEST=$(ls -1t "$EVIDENCE_DIR"/*.json 2>/dev/null | head -1 || true)
|
||||||
if [ -n "$LATEST" ]; then
|
if [ -n "$LATEST" ]; then
|
||||||
echo "OK: Latest engagement report: $LATEST"
|
echo "OK: Latest engagement report: $LATEST"
|
||||||
|
|
@ -222,7 +222,7 @@ Check 3 — engagement evidence has been collected at least once:
|
||||||
Summary:
|
Summary:
|
||||||
echo ""
|
echo ""
|
||||||
echo "Observable status: addressable=disinto.ai measurement=caddy-access-logs"
|
echo "Observable status: addressable=disinto.ai measurement=caddy-access-logs"
|
||||||
echo "Evidence path: evidence/engagement/YYYY-MM-DD.json"
|
echo "Evidence path: \$OPS_REPO_ROOT/evidence/engagement/YYYY-MM-DD.json"
|
||||||
echo "Consumer: planner reads evidence/engagement/ during gap analysis"
|
echo "Consumer: planner reads ops repo evidence/engagement/ during gap analysis"
|
||||||
"""
|
"""
|
||||||
needs = ["verify"]
|
needs = ["verify"]
|
||||||
|
|
|
||||||
|
|
@ -34,12 +34,12 @@ and injected into your prompt above. Review them now.
|
||||||
(24h grace period). Check the "Stale Phase Cleanup" section for any
|
(24h grace period). Check the "Stale Phase Cleanup" section for any
|
||||||
files cleaned or in grace period this run.
|
files cleaned or in grace period this run.
|
||||||
|
|
||||||
2. Check vault state: read vault/pending/*.md for any procurement items
|
2. Check vault state: read $OPS_REPO_ROOT/vault/pending/*.md for any procurement items
|
||||||
the planner has filed. Note items relevant to the health assessment
|
the planner has filed. Note items relevant to the health assessment
|
||||||
(e.g. a blocked resource that explains why the pipeline is stalled).
|
(e.g. a blocked resource that explains why the pipeline is stalled).
|
||||||
|
|
||||||
3. Read the supervisor journal for recent history:
|
3. Read the supervisor journal for recent history:
|
||||||
JOURNAL_FILE="$FACTORY_ROOT/supervisor/journal/$(date -u +%Y-%m-%d).md"
|
JOURNAL_FILE="$OPS_REPO_ROOT/journal/supervisor/$(date -u +%Y-%m-%d).md"
|
||||||
if [ -f "$JOURNAL_FILE" ]; then cat "$JOURNAL_FILE"; fi
|
if [ -f "$JOURNAL_FILE" ]; then cat "$JOURNAL_FILE"; fi
|
||||||
|
|
||||||
4. Note any values that cross these thresholds:
|
4. Note any values that cross these thresholds:
|
||||||
|
|
@ -151,7 +151,7 @@ For each finding from the health assessment, decide and execute an action.
|
||||||
|
|
||||||
For P0-P2 issues that persist after auto-fix attempts, or issues requiring
|
For P0-P2 issues that persist after auto-fix attempts, or issues requiring
|
||||||
human judgment, file a vault procurement item:
|
human judgment, file a vault procurement item:
|
||||||
Write $PROJECT_REPO_ROOT/vault/pending/supervisor-<issue-slug>.md:
|
Write $OPS_REPO_ROOT/vault/pending/supervisor-<issue-slug>.md:
|
||||||
# <What is needed>
|
# <What is needed>
|
||||||
## What
|
## What
|
||||||
<description of the problem and why the supervisor cannot fix it>
|
<description of the problem and why the supervisor cannot fix it>
|
||||||
|
|
@ -162,11 +162,11 @@ human judgment, file a vault procurement item:
|
||||||
The vault-poll will notify the human and track the request.
|
The vault-poll will notify the human and track the request.
|
||||||
|
|
||||||
Read the relevant best-practices file before taking action:
|
Read the relevant best-practices file before taking action:
|
||||||
cat "$FACTORY_ROOT/supervisor/best-practices/memory.md" # P0
|
cat "$OPS_REPO_ROOT/knowledge/memory.md" # P0
|
||||||
cat "$FACTORY_ROOT/supervisor/best-practices/disk.md" # P1
|
cat "$OPS_REPO_ROOT/knowledge/disk.md" # P1
|
||||||
cat "$FACTORY_ROOT/supervisor/best-practices/ci.md" # P2 CI
|
cat "$OPS_REPO_ROOT/knowledge/ci.md" # P2 CI
|
||||||
cat "$FACTORY_ROOT/supervisor/best-practices/dev-agent.md" # P2 agent
|
cat "$OPS_REPO_ROOT/knowledge/dev-agent.md" # P2 agent
|
||||||
cat "$FACTORY_ROOT/supervisor/best-practices/git.md" # P2 git
|
cat "$OPS_REPO_ROOT/knowledge/git.md" # P2 git
|
||||||
|
|
||||||
Track what you fixed and what vault items you filed for the report step.
|
Track what you fixed and what vault items you filed for the report step.
|
||||||
"""
|
"""
|
||||||
|
|
@ -208,7 +208,7 @@ description = """
|
||||||
Append a timestamped entry to the supervisor journal.
|
Append a timestamped entry to the supervisor journal.
|
||||||
|
|
||||||
File path:
|
File path:
|
||||||
$FACTORY_ROOT/supervisor/journal/$(date -u +%Y-%m-%d).md
|
$OPS_REPO_ROOT/journal/supervisor/$(date -u +%Y-%m-%d).md
|
||||||
|
|
||||||
If the file already exists (multiple runs per day), append a new section.
|
If the file already exists (multiple runs per day), append a new section.
|
||||||
If it does not exist, create it.
|
If it does not exist, create it.
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ sourced as needed.
|
||||||
| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `build_graph_section()`, `run_formula_and_monitor(AGENT [TIMEOUT] [CALLBACK])` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `formula_phase_callback()` handles `PHASE:escalate` (unified escalation path — kills the session). `run_formula_and_monitor` accepts an optional CALLBACK (default: `formula_phase_callback`) so callers can install custom merge-through or escalation handlers. | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh, action-agent.sh |
|
| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `build_graph_section()`, `run_formula_and_monitor(AGENT [TIMEOUT] [CALLBACK])` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `formula_phase_callback()` handles `PHASE:escalate` (unified escalation path — kills the session). `run_formula_and_monitor` accepts an optional CALLBACK (default: `formula_phase_callback`) so callers can install custom merge-through or escalation handlers. | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh, action-agent.sh |
|
||||||
| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, action-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points |
|
| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, action-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points |
|
||||||
| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh and dev/phase-handler.sh — called after every successful merge. | dev-poll.sh, phase-handler.sh |
|
| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh and dev/phase-handler.sh — called after every successful merge. | dev-poll.sh, phase-handler.sh |
|
||||||
| `lib/build-graph.py` | Python tool: parses VISION.md, prerequisite-tree.md, AGENTS.md, formulas/*.toml, evidence/, and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh |
|
| `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh |
|
||||||
| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh |
|
| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh |
|
||||||
| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) |
|
| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) |
|
||||||
| `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) |
|
| `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) |
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,13 @@ export TEA_LOGIN
|
||||||
export PROJECT_NAME="${PROJECT_NAME:-${FORGE_REPO##*/}}"
|
export PROJECT_NAME="${PROJECT_NAME:-${FORGE_REPO##*/}}"
|
||||||
export PROJECT_REPO_ROOT="${PROJECT_REPO_ROOT:-/home/${USER}/${PROJECT_NAME}}"
|
export PROJECT_REPO_ROOT="${PROJECT_REPO_ROOT:-/home/${USER}/${PROJECT_NAME}}"
|
||||||
export PRIMARY_BRANCH="${PRIMARY_BRANCH:-master}"
|
export PRIMARY_BRANCH="${PRIMARY_BRANCH:-master}"
|
||||||
|
|
||||||
|
# Ops repo: operational data (vault items, journals, evidence, prerequisites).
|
||||||
|
# Default convention: sibling directory named {project}-ops.
|
||||||
|
export OPS_REPO_ROOT="${OPS_REPO_ROOT:-/home/${USER}/${PROJECT_NAME}-ops}"
|
||||||
|
|
||||||
|
# Forge repo slug for the ops repo (used by agents that commit to ops).
|
||||||
|
export FORGE_OPS_REPO="${FORGE_OPS_REPO:-${FORGE_REPO:+${FORGE_REPO}-ops}}"
|
||||||
export WOODPECKER_REPO_ID="${WOODPECKER_REPO_ID:-}"
|
export WOODPECKER_REPO_ID="${WOODPECKER_REPO_ID:-}"
|
||||||
export WOODPECKER_SERVER="${WOODPECKER_SERVER:-http://localhost:8000}"
|
export WOODPECKER_SERVER="${WOODPECKER_SERVER:-http://localhost:8000}"
|
||||||
export CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"
|
export CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"
|
||||||
|
|
|
||||||
|
|
@ -67,37 +67,91 @@ load_formula() {
|
||||||
|
|
||||||
# build_context_block FILE [FILE ...]
|
# build_context_block FILE [FILE ...]
|
||||||
# Reads each file from $PROJECT_REPO_ROOT and builds CONTEXT_BLOCK.
|
# Reads each file from $PROJECT_REPO_ROOT and builds CONTEXT_BLOCK.
|
||||||
|
# Files prefixed with "ops:" are read from $OPS_REPO_ROOT instead.
|
||||||
build_context_block() {
|
build_context_block() {
|
||||||
CONTEXT_BLOCK=""
|
CONTEXT_BLOCK=""
|
||||||
local ctx ctx_path
|
local ctx ctx_path ctx_label
|
||||||
for ctx in "$@"; do
|
for ctx in "$@"; do
|
||||||
|
case "$ctx" in
|
||||||
|
ops:*)
|
||||||
|
ctx_label="${ctx#ops:}"
|
||||||
|
ctx_path="${OPS_REPO_ROOT}/${ctx_label}"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
ctx_label="$ctx"
|
||||||
ctx_path="${PROJECT_REPO_ROOT}/${ctx}"
|
ctx_path="${PROJECT_REPO_ROOT}/${ctx}"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
if [ -f "$ctx_path" ]; then
|
if [ -f "$ctx_path" ]; then
|
||||||
CONTEXT_BLOCK="${CONTEXT_BLOCK}
|
CONTEXT_BLOCK="${CONTEXT_BLOCK}
|
||||||
### ${ctx}
|
### ${ctx_label}
|
||||||
$(cat "$ctx_path")
|
$(cat "$ctx_path")
|
||||||
"
|
"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Escalation reply consumption ─────────────────────────────────────────
|
# ── Ops repo helpers ─────────────────────────────────────────────────
|
||||||
|
|
||||||
# consume_escalation_reply AGENT_NAME
|
# ensure_ops_repo
|
||||||
# Atomically consumes /tmp/{agent}-escalation-reply if it exists.
|
# Clones or pulls the ops repo so agents can read/write operational data.
|
||||||
# Sets ESCALATION_REPLY to the file contents (empty string if no reply).
|
# Requires: OPS_REPO_ROOT, FORGE_OPS_REPO, FORGE_URL, FORGE_TOKEN.
|
||||||
consume_escalation_reply() {
|
# No-op if OPS_REPO_ROOT already exists and is up-to-date.
|
||||||
local agent="$1"
|
ensure_ops_repo() {
|
||||||
local reply_file="/tmp/${agent}-escalation-reply"
|
local ops_root="${OPS_REPO_ROOT:-}"
|
||||||
ESCALATION_REPLY=""
|
[ -n "$ops_root" ] || return 0
|
||||||
if [ -s "$reply_file" ]; then
|
|
||||||
local tmp_file="${reply_file}.consumed.$$"
|
if [ -d "${ops_root}/.git" ]; then
|
||||||
if mv "$reply_file" "$tmp_file" 2>/dev/null; then
|
# Pull latest from primary branch
|
||||||
ESCALATION_REPLY=$(cat "$tmp_file")
|
git -C "$ops_root" fetch origin "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
|
||||||
rm -f "$tmp_file"
|
git -C "$ops_root" checkout "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
|
||||||
log "Consumed escalation reply: $(echo "$ESCALATION_REPLY" | head -1)"
|
git -C "$ops_root" pull --ff-only origin "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
|
||||||
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Clone from Forgejo
|
||||||
|
local ops_repo="${FORGE_OPS_REPO:-}"
|
||||||
|
[ -n "$ops_repo" ] || return 0
|
||||||
|
local forge_url="${FORGE_URL:-http://localhost:3000}"
|
||||||
|
local clone_url
|
||||||
|
if [ -n "${FORGE_TOKEN:-}" ]; then
|
||||||
|
local auth_url
|
||||||
|
auth_url=$(printf '%s' "$forge_url" | sed "s|://|://$(whoami):${FORGE_TOKEN}@|")
|
||||||
|
clone_url="${auth_url}/${ops_repo}.git"
|
||||||
|
else
|
||||||
|
clone_url="${forge_url}/${ops_repo}.git"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
log "Cloning ops repo: ${ops_repo} -> ${ops_root}"
|
||||||
|
if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then
|
||||||
|
log "Ops repo cloned: ${ops_root}"
|
||||||
|
else
|
||||||
|
log "WARNING: failed to clone ops repo ${ops_repo} — creating local directory"
|
||||||
|
mkdir -p "$ops_root"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# ops_commit_and_push MESSAGE [FILE ...]
|
||||||
|
# Stage, commit, and push changes in the ops repo.
|
||||||
|
# If no files specified, stages all changes.
|
||||||
|
ops_commit_and_push() {
|
||||||
|
local msg="$1"
|
||||||
|
shift
|
||||||
|
local ops_root="${OPS_REPO_ROOT:-}"
|
||||||
|
[ -d "${ops_root}/.git" ] || return 0
|
||||||
|
|
||||||
|
(
|
||||||
|
cd "$ops_root" || return
|
||||||
|
if [ $# -gt 0 ]; then
|
||||||
|
git add "$@"
|
||||||
|
else
|
||||||
|
git add -A
|
||||||
|
fi
|
||||||
|
if ! git diff --cached --quiet; then
|
||||||
|
git commit -m "$msg"
|
||||||
|
git push origin "${PRIMARY_BRANCH}" --quiet 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Session management ───────────────────────────────────────────────────
|
# ── Session management ───────────────────────────────────────────────────
|
||||||
|
|
@ -296,6 +350,7 @@ NEVER echo or include the actual token value in output — always reference \${F
|
||||||
## Environment
|
## Environment
|
||||||
FACTORY_ROOT=${FACTORY_ROOT}
|
FACTORY_ROOT=${FACTORY_ROOT}
|
||||||
PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT}
|
PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT}
|
||||||
|
OPS_REPO_ROOT=${OPS_REPO_ROOT}
|
||||||
PRIMARY_BRANCH=${PRIMARY_BRANCH}
|
PRIMARY_BRANCH=${PRIMARY_BRANCH}
|
||||||
PHASE_FILE=${PHASE_FILE}
|
PHASE_FILE=${PHASE_FILE}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,10 @@ emit('FORGE_URL', cfg.get('forge_url', ''))
|
||||||
|
|
||||||
if 'repo_root' in cfg:
|
if 'repo_root' in cfg:
|
||||||
emit('PROJECT_REPO_ROOT', cfg['repo_root'])
|
emit('PROJECT_REPO_ROOT', cfg['repo_root'])
|
||||||
|
if 'ops_repo_root' in cfg:
|
||||||
|
emit('OPS_REPO_ROOT', cfg['ops_repo_root'])
|
||||||
|
if 'ops_repo' in cfg:
|
||||||
|
emit('FORGE_OPS_REPO', cfg['ops_repo'])
|
||||||
if 'primary_branch' in cfg:
|
if 'primary_branch' in cfg:
|
||||||
emit('PRIMARY_BRANCH', cfg['primary_branch'])
|
emit('PRIMARY_BRANCH', cfg['primary_branch'])
|
||||||
|
|
||||||
|
|
@ -99,4 +103,14 @@ if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then
|
||||||
export PROJECT_REPO_ROOT="/home/${USER}/${PROJECT_NAME}"
|
export PROJECT_REPO_ROOT="/home/${USER}/${PROJECT_NAME}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Derive OPS_REPO_ROOT if not explicitly set
|
||||||
|
if [ -z "${OPS_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then
|
||||||
|
export OPS_REPO_ROOT="/home/${USER}/${PROJECT_NAME}-ops"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Derive FORGE_OPS_REPO if not explicitly set
|
||||||
|
if [ -z "${FORGE_OPS_REPO:-}" ] && [ -n "${FORGE_REPO:-}" ]; then
|
||||||
|
export FORGE_OPS_REPO="${FORGE_REPO}-ops"
|
||||||
|
fi
|
||||||
|
|
||||||
unset _PROJECT_TOML _PROJECT_VARS _key _val
|
unset _PROJECT_TOML _PROJECT_VARS _key _val
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
**Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints),
|
**Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints),
|
||||||
executed directly from cron via tmux + Claude.
|
executed directly from cron via tmux + Claude.
|
||||||
Phase 0 (preflight): pull latest code, load persistent memory and prerequisite
|
Phase 0 (preflight): pull latest code, load persistent memory and prerequisite
|
||||||
tree from `planner/MEMORY.md` and `planner/prerequisite-tree.md`. Also reads
|
tree from `$OPS_REPO_ROOT/knowledge/planner-memory.md` and `$OPS_REPO_ROOT/prerequisites.md`. Also reads
|
||||||
all available formulas: factory formulas (`$FACTORY_ROOT/formulas/*.toml`) and
|
all available formulas: factory formulas (`$FACTORY_ROOT/formulas/*.toml`) and
|
||||||
project-specific formulas (`$PROJECT_REPO_ROOT/formulas/*.toml`). Phase 1
|
project-specific formulas (`$PROJECT_REPO_ROOT/formulas/*.toml`). Phase 1
|
||||||
(prediction-triage): triage `prediction/unreviewed` issues filed by the
|
(prediction-triage): triage `prediction/unreviewed` issues filed by the
|
||||||
|
|
@ -20,7 +20,7 @@ prerequisites, discover new ones, update the tree. **Also scans comments on
|
||||||
referenced issues for bounce/stuck signals** (BOUNCED, LABEL_CHURN)
|
referenced issues for bounce/stuck signals** (BOUNCED, LABEL_CHURN)
|
||||||
to detect issues ping-ponging between backlog and underspecified. Issues that
|
to detect issues ping-ponging between backlog and underspecified. Issues that
|
||||||
need human decisions or external resources are filed as vault procurement items
|
need human decisions or external resources are filed as vault procurement items
|
||||||
(`vault/pending/*.md`) instead of being escalated. Phase 3
|
(`$OPS_REPO_ROOT/vault/pending/*.md`) instead of being escalated. Phase 3
|
||||||
(file-at-constraints): identify the top 3 unresolved prerequisites that block
|
(file-at-constraints): identify the top 3 unresolved prerequisites that block
|
||||||
the most downstream objectives — file issues as either `backlog` (code changes,
|
the most downstream objectives — file issues as either `backlog` (code changes,
|
||||||
dev-agent) or `action` (run existing formula, action-agent). **Stuck issues
|
dev-agent) or `action` (run existing formula, action-agent). **Stuck issues
|
||||||
|
|
@ -28,18 +28,17 @@ dev-agent) or `action` (run existing formula, action-agent). **Stuck issues
|
||||||
in breakdown mode instead of being re-promoted** — this breaks the ping-pong
|
in breakdown mode instead of being re-promoted** — this breaks the ping-pong
|
||||||
loop by splitting them into dev-agent-sized sub-issues. **Human-blocked issues
|
loop by splitting them into dev-agent-sized sub-issues. **Human-blocked issues
|
||||||
are routed through the vault** — the planner files an actionable procurement
|
are routed through the vault** — the planner files an actionable procurement
|
||||||
item (`vault/pending/<project>-<slug>.md` with What/Why/Human action/Factory
|
item (`$OPS_REPO_ROOT/vault/pending/<project>-<slug>.md` with What/Why/Human action/Factory
|
||||||
will then sections) and marks the prerequisite as blocked-on-vault in the tree.
|
will then sections) and marks the prerequisite as blocked-on-vault in the tree.
|
||||||
Deduplication: checks pending/ + approved/ + fired/ before creating.
|
Deduplication: checks pending/ + approved/ + fired/ before creating.
|
||||||
Phase 4 (journal-and-memory): write updated prerequisite tree + daily journal
|
Phase 4 (journal-and-memory): write updated prerequisite tree + daily journal
|
||||||
entry (committed to git) and update `planner/MEMORY.md` (committed to git).
|
entry (committed to ops repo) and update `$OPS_REPO_ROOT/knowledge/planner-memory.md`.
|
||||||
Phase 5 (commit-and-pr): one commit with all file changes, push, create PR.
|
Phase 5 (commit-ops): commit all ops repo changes, push directly.
|
||||||
AGENTS.md maintenance is handled by the Gardener.
|
AGENTS.md maintenance is handled by the Gardener.
|
||||||
|
|
||||||
**Artifacts use `$PROJECT_REPO_ROOT`**: All planner artifacts (journal,
|
**Artifacts use `$OPS_REPO_ROOT`**: All planner artifacts (journal,
|
||||||
prerequisite tree, memory, vault state) live under `$PROJECT_REPO_ROOT/planner/`
|
prerequisite tree, memory, vault state) live under `$OPS_REPO_ROOT/`.
|
||||||
and `$PROJECT_REPO_ROOT/vault/`, not `$FACTORY_ROOT`. Each project manages its
|
Each project manages its own planner state in a separate ops repo.
|
||||||
own planner state independently.
|
|
||||||
|
|
||||||
**Trigger**: `planner-run.sh` runs daily via cron (accepts an optional project
|
**Trigger**: `planner-run.sh` runs daily via cron (accepts an optional project
|
||||||
TOML argument, defaults to `projects/disinto.toml`). Sources `lib/guard.sh` and
|
TOML argument, defaults to `projects/disinto.toml`). Sources `lib/guard.sh` and
|
||||||
|
|
@ -60,12 +59,12 @@ component, not work.
|
||||||
- `formulas/groom-backlog.toml` — Dual-mode formula: grooming (default) or
|
- `formulas/groom-backlog.toml` — Dual-mode formula: grooming (default) or
|
||||||
breakdown (dispatched by planner for bounced/stuck issues — splits the issue
|
breakdown (dispatched by planner for bounced/stuck issues — splits the issue
|
||||||
into dev-agent-sized sub-issues, removes `underspecified` label)
|
into dev-agent-sized sub-issues, removes `underspecified` label)
|
||||||
- `planner/prerequisite-tree.md` — Prerequisite tree: versioned constraint
|
- `$OPS_REPO_ROOT/prerequisites.md` — Prerequisite tree: versioned constraint
|
||||||
map linking VISION.md objectives to their prerequisites. Planner owns the
|
map linking VISION.md objectives to their prerequisites. Planner owns the
|
||||||
tree, humans steer by editing VISION.md. Tree grows organically as the
|
tree, humans steer by editing VISION.md. Tree grows organically as the
|
||||||
planner discovers new prerequisites during runs
|
planner discovers new prerequisites during runs
|
||||||
- `planner/MEMORY.md` — Persistent memory across runs (committed to git)
|
- `$OPS_REPO_ROOT/knowledge/planner-memory.md` — Persistent memory across runs (in ops repo)
|
||||||
- `planner/journal/*.md` — Daily raw logs from each planner run (committed to git)
|
- `$OPS_REPO_ROOT/journal/planner/*.md` — Daily raw logs from each planner run (in ops repo)
|
||||||
|
|
||||||
**Constraint focus**: The planner uses Theory of Constraints to avoid premature
|
**Constraint focus**: The planner uses Theory of Constraints to avoid premature
|
||||||
issue filing. Only the top 3 unresolved prerequisites that block the most
|
issue filing. Only the top 3 unresolved prerequisites that block the most
|
||||||
|
|
@ -74,5 +73,5 @@ prerequisite tree but NOT as issues. This prevents the "spray issues across
|
||||||
all milestones" pattern that produced premature work in planner v1/v2.
|
all milestones" pattern that produced premature work in planner v1/v2.
|
||||||
|
|
||||||
**Environment variables consumed**:
|
**Environment variables consumed**:
|
||||||
- `FORGE_TOKEN`, `FORGE_PLANNER_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`
|
- `FORGE_TOKEN`, `FORGE_PLANNER_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`, `OPS_REPO_ROOT`
|
||||||
- `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to opus by planner-run.sh)
|
- `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to opus by planner-run.sh)
|
||||||
|
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
<!-- summarized-through-run: 6 -->
|
|
||||||
# Planner Memory
|
|
||||||
|
|
||||||
## 2026-03-26 — Sixth planner run
|
|
||||||
|
|
||||||
### Milestone state
|
|
||||||
- **Foundation**: COMPLETE. All agent loops, supervisor, planner, multi-project, knowledge graph, predictor-planner feedback loop — all working.
|
|
||||||
- **Adoption**: 4/5 COMPLETE. Bootstrap (#393), docs (#394), dashboard (#395), landing page (#534) all done. Only #466 (example project) remains — stuck on human decision since 2026-03-23.
|
|
||||||
- **Ship (Fold 2)**: ENTERING SCOPE. Rent-a-human (#679) done. Exec agent (#699) done. Observable addressables (#718) filed. Deploy profiles and assumptions register not yet tracked.
|
|
||||||
- **Scale**: DEFERRED. No external users yet. Plugin system, community formulas, hosted option all premature.
|
|
||||||
|
|
||||||
### Completed since last summary (runs 2-6)
|
|
||||||
- Bootstrap fully hardened: init smoke test (#668), CI wiring (#661), Forgejo reachability (#660), 10+ bootstrap fixes
|
|
||||||
- Full stack containerized (#618, #619) with Forgejo, Woodpecker, Dendrite
|
|
||||||
- Autonomous merge pipeline (#568) — PRs auto-merge on CI pass + approval
|
|
||||||
- Unified escalation path (#510) — PHASE:escalate replaces needs_human
|
|
||||||
- Factory operational reliability — guard logging (#663), stale phase cleanup (#664)
|
|
||||||
- Prediction/backlog killed (#686) — planner now only ACTIONs or DISMISSes predictions
|
|
||||||
- Planner v2 — graph-driven formula (#667), tea CLI integration (#666)
|
|
||||||
- Exec agent (#699) — interactive assistant via Matrix
|
|
||||||
- Rent-a-human (#679) — formula-dispatchable human action drafts
|
|
||||||
- Tech-debt queue cleared (~30 items)
|
|
||||||
- Skill package initiative started (#710-#715) from research (#709)
|
|
||||||
|
|
||||||
### Patterns
|
|
||||||
- **Label loss resolved**: #535 fixed the recurring label-loss pattern. Labels now persist reliably.
|
|
||||||
- **Predictor signal quality improved**: Later runs show 100% substantive predictions. Over-signaling on transient ops issues has stopped.
|
|
||||||
- **Human bottleneck is real**: #466 escalated 2026-03-23, still no response after 3 days. When the factory needs human input and doesn't get it, work halts on that branch entirely.
|
|
||||||
- **Factory throughput is extreme when unblocked**: 50+ issues cleared in ~5 days (2026-03-20 to 2026-03-25). Pipeline processes ~10 issues/day when backlog is stocked.
|
|
||||||
- **Duplicate issues from parallel creation**: #710/#714 and #711/#715 are duplicates — likely created in separate exec/research sessions. Gardener should catch these.
|
|
||||||
- **prediction/backlog migration**: All 4 legacy prediction/backlog items dismissed and closed in run 6. prediction/dismissed label created.
|
|
||||||
|
|
||||||
### Strategic direction
|
|
||||||
- Ship milestone is the next frontier. Adoption is blocked only on #466 (human decision).
|
|
||||||
- Skill package distribution (#710→#711→#712) is the immediate pipeline work — packaging disinto for external discovery.
|
|
||||||
- Observable addressables (#718) bridges Fold 2 → Fold 3 — core vision item.
|
|
||||||
- The factory has the exec agent (#699) and rent-a-human (#679) — two vision capabilities now live.
|
|
||||||
- VISION.md updated with factory primitives (resources, addressables, observables) — formalizes the framework.
|
|
||||||
|
|
||||||
### Watch list
|
|
||||||
- #466: human response overdue (3 days) — will it ever be unblocked?
|
|
||||||
- #710-#712: skill package pipeline — first new work direction since Adoption
|
|
||||||
- #714/#715: duplicate cleanup by gardener
|
|
||||||
- prediction/backlog label: should be deleted per #686, still exists
|
|
||||||
- Ship milestone gaps: deploy profiles, assumptions register, vault-gated folds — not yet filed
|
|
||||||
|
|
@ -1,65 +0,0 @@
|
||||||
# Planner run — 2026-03-21 09:29 UTC
|
|
||||||
|
|
||||||
## Predictions triaged
|
|
||||||
- #455: DISMISS — orphaned gardener session, transient, supervisor's job
|
|
||||||
- #454: DISMISS — crashed review session, transient, supervisor recovers
|
|
||||||
- #449: DISMISS — legacy predictor duplication, already tracked by #419
|
|
||||||
- #448: WATCH — disk at 75% (was 79% last run), improving trend, supervisor monitors
|
|
||||||
- #447: DISMISS — swap at 52%, expected behavior with memory guards
|
|
||||||
- #446: WATCH — harb pipeline stalled 8h on needs_human, supervisor didn't escalate
|
|
||||||
|
|
||||||
## Issues created
|
|
||||||
- #465: feat: supervisor escalates prolonged PHASE:needs_human states — revealed by #446 pattern, prevents silent pipeline stalls
|
|
||||||
- #466: feat: example project demonstrating the full Disinto lifecycle — VISION.md Adoption gap, needed by docs and landing page
|
|
||||||
|
|
||||||
## Label fixes
|
|
||||||
- #393 (disinto init): added backlog label — was created last run but lost its label
|
|
||||||
- #394 (quickstart docs): added backlog label — same issue
|
|
||||||
- #395 (metrics dashboard): added backlog label — same issue
|
|
||||||
|
|
||||||
## Observations
|
|
||||||
- Predictor continues to over-signal on transient operational issues (4/6 predictions were transient tmux/session issues the supervisor already handles). Pattern from last run confirmed.
|
|
||||||
- Adoption issues from last planner run (#393/#394/#395) existed but had NO labels. The dev-agent only picks up backlog-labeled issues. Root cause unclear — either the label API call failed silently during creation, or labels were removed. Fixed this run.
|
|
||||||
- Foundation milestone remains complete. Adoption still the bottleneck — no progress since last run because issues weren't in the backlog.
|
|
||||||
- Tech-debt and small backlog items (~20) will be processed before Adoption features due to sequential pipeline and lower issue numbers.
|
|
||||||
- #357 (action-agent runtime isolation) is in-progress — active dev work happening.
|
|
||||||
|
|
||||||
## Deferred
|
|
||||||
- Scale milestone (multi-project works, plugin system premature without users)
|
|
||||||
- Evidence pipeline (harb-specific, blocked on #1047)
|
|
||||||
- Production halt/resume (#1) — far future, no users to protect yet
|
|
||||||
- Multi-VPS (#4) — premature, single server handles current load
|
|
||||||
- Adding backlog labels to #462 (PHASE:escalate) and #291 (secrets in formulas) — both valid but not highest leverage this cycle
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# Planner run — 2026-03-21 10:05 UTC
|
|
||||||
|
|
||||||
## Predictions triaged
|
|
||||||
No unreviewed predictions.
|
|
||||||
|
|
||||||
## Issues created
|
|
||||||
No new issues — backlog is well-stocked (~30 open items) and aligned with VISION.md.
|
|
||||||
|
|
||||||
## Label fixes
|
|
||||||
- #291 (secrets in formulas): added backlog label — deferred last run, now promoted
|
|
||||||
- #289 (gardener creates investigation issues for closed escalations): added backlog label
|
|
||||||
- #462 (PHASE:escalate): added backlog label — complements #465
|
|
||||||
|
|
||||||
## Closures
|
|
||||||
- #144 (reschedule planner after predictor): closed — already implemented (predictor 06:00, planner weekly)
|
|
||||||
|
|
||||||
## Observations
|
|
||||||
- #357 (action-agent runtime isolation) COMPLETED since last run — merged via PR #464. Watch item resolved.
|
|
||||||
- #360 (no relabeling on DISMISS) also completed — merged via PR #468.
|
|
||||||
- Label persistence: #393/#394/#395 retain their backlog labels. Watch item resolved — the label loss from last run was a one-time issue.
|
|
||||||
- #361 (planner journal pattern) is in-progress — active dev work.
|
|
||||||
- Backlog queue still deep: ~20 tech-debt and small fixes sit ahead of the 4 Adoption features (#393/#394/#395/#466). Sequential pipeline means Adoption work is weeks out unless manually prioritized.
|
|
||||||
- Three previously unlabeled issues (#291, #289, #462) were invisible to the dev-agent. Now labeled as backlog. Last run deferred #291 and #462; this run is the right time to make them visible since the pipeline needs work to process.
|
|
||||||
|
|
||||||
## Deferred
|
|
||||||
- Scale milestone (plugin system, hosted option, community formulas — premature without users)
|
|
||||||
- Multi-VPS (#4) — single server handles current load
|
|
||||||
- Production halt/resume (#1) — no users to protect yet
|
|
||||||
- Self-tuning wake parameters (#2) — current static cron schedule works fine
|
|
||||||
- Memory update — only 2 runs since last summarization, threshold is 5
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
||||||
# Planner run — 2026-03-22 07:02 UTC
|
|
||||||
|
|
||||||
## Predictions triaged
|
|
||||||
- #528: DISMISS — missing backlog label on #466. Fixed directly by adding the label. Recurring label-loss pattern (4/4 planner-created issues affected). Closed.
|
|
||||||
- #529: WATCH — swap at 57%, up from 52% (#447). Available RAM (4385MB) still above 2000MB threshold. Monitoring upward trend.
|
|
||||||
|
|
||||||
## Prerequisite tree updates
|
|
||||||
- Resolved: #393, #394, #395, #510, #504, #516, #514 all closed since last run
|
|
||||||
- Objectives completed: 5 moved to DONE (init, docs, dashboard, escalation, vault)
|
|
||||||
- Objectives ready: #466 (example project), #534 (landing page) — both in backlog
|
|
||||||
|
|
||||||
## Top 3 constraints
|
|
||||||
1. #466 (example project) — blocks Adoption completion → delays Scale — issue already in backlog
|
|
||||||
2. #534 (landing page value proposition) — blocks Growth goals visibility — issue filed this run
|
|
||||||
3. #535 (label-loss on planner-created issues) — cross-cutting reliability — issue filed this run
|
|
||||||
|
|
||||||
## Issues created
|
|
||||||
- #534: feat: landing page communicates value proposition clearly — Adoption milestone gap, Growth goals
|
|
||||||
- #535: fix: planner-created issues lose backlog labels on creation — 4/4 failure rate, delays pipeline activation
|
|
||||||
|
|
||||||
## Label fixes
|
|
||||||
- #466: added backlog label — confirmed prediction #528 was correct, recurring pattern
|
|
||||||
|
|
||||||
## Observations
|
|
||||||
- Explosive progress: ~50 issues closed between 2026-03-20 and 2026-03-22. Foundation and most of Adoption now complete.
|
|
||||||
- Adoption milestone nearly done: only #466 (example project) and #534 (landing page) remain. Both are READY with no blocking prerequisites.
|
|
||||||
- Tech-debt queue (9 items: #93, #110, #179, #310, #311, #330, #429, #433, #435) has lower issue numbers than #466, so sequential pipeline will process them first.
|
|
||||||
- #531 (dev-poll direct merges) is in-progress — improves pipeline throughput for approved PRs.
|
|
||||||
- Vault infrastructure deployed but empty (no pending/approved/fired items). Procurement gate available when Scale needs resources.
|
|
||||||
- Label-loss pattern confirmed as persistent: 4/4 planner-created issues lost backlog labels. Filed #535 to investigate and fix.
|
|
||||||
- Predictor signal quality improving: 2 predictions this run (vs 6 last run), both actionable.
|
|
||||||
- Scale milestone remains premature — no users yet. Plugin system, community formulas, hosted option all deferred.
|
|
||||||
|
|
||||||
## Deferred (in tree, not filed)
|
|
||||||
- Scale: plugin system, community formulas, hosted option — premature without users
|
|
||||||
- Vision items: #1 (halt/resume), #2 (self-tuning), #4 (multi-VPS) — far future
|
|
||||||
- Prediction #448 (disk 75%): still in prediction/backlog, trend was improving
|
|
||||||
- Prediction #446 (harb stall): supervisor escalation (#465/#510) now in place, should prevent recurrence
|
|
||||||
|
|
@ -1,44 +0,0 @@
|
||||||
# Planner run — 2026-03-23 07:15 UTC
|
|
||||||
|
|
||||||
## Predictions triaged
|
|
||||||
- #583: DISMISS — #568 (merge guard blocker) already exists as an open issue. Fixed labels directly (added backlog+priority).
|
|
||||||
- #582: DISMISS — backlog depletion is expected; this planner run is the replenishment cycle. Factory cleared 20+ issues in 48h.
|
|
||||||
- #581: DISMISS — Gitea CVEs are Codeberg's upstream infrastructure. Disinto can't upgrade their Gitea. RCE requires repo-template processing (not in our workflow). Auto-merge cancellation mitigated by our review-agent flow.
|
|
||||||
- #580: WATCH — Caddy CVEs. disinto.ai is a static site without FastCGI/PHP, so the HIGH RCE (CVE-2026-27590) doesn't apply. Medium CVEs low risk. No system-upgrade formula available.
|
|
||||||
|
|
||||||
## Prerequisite tree updates
|
|
||||||
- Resolved: #534 (landing page) → DONE, #535 (label-loss fix) → resolved
|
|
||||||
- Discovered: #568 (merge guard blocker) added as new objective — every PR merge escalates, blocking full pipeline autonomy
|
|
||||||
- Status change: #466 remains READY, #534 moved to DONE
|
|
||||||
|
|
||||||
## Top 3 constraints
|
|
||||||
1. #568 — PreToolUse guard blocks merge — affects every PR across all agents — issue already open, added backlog+priority
|
|
||||||
2. #466 — example project (last Adoption item) — blocks Adoption completion — issue already open, added backlog+priority
|
|
||||||
3. Tech-debt backlog visibility — 9 items invisible to dev-poll — fixed by adding backlog labels to all 9
|
|
||||||
|
|
||||||
## Issues created
|
|
||||||
No new issues — all constraints already have existing issues.
|
|
||||||
|
|
||||||
## Priority label changes
|
|
||||||
- Added priority: #568, #466 (top 2 constraints)
|
|
||||||
- No priority labels removed (clean set)
|
|
||||||
|
|
||||||
## Label fixes
|
|
||||||
- #568: added backlog + priority (was unlabeled, invisible to dev-poll)
|
|
||||||
- #466: added backlog + priority (label-loss recurrence — 5th time this pattern appears)
|
|
||||||
- #93, #110, #179, #310, #311, #330, #429, #433, #435: added backlog label to all 9 tech-debt items
|
|
||||||
|
|
||||||
## Observations
|
|
||||||
- Explosive throughput confirmed: factory cleared entire backlog (20+ issues, 19+ PRs) in ~48h. The predictor correctly flagged the empty state (#582).
|
|
||||||
- Label-loss persists despite #535 fix: #466 lost its label AGAIN. The #535 fix addressed planner-created label application, but #466 was created before that fix. Root cause may be that the original label was never applied, or was stripped by gardener quality gate (the issue body does have acceptance criteria, so the gate shouldn't strip it).
|
|
||||||
- Merge guard (#568) is the #1 factory constraint: every PR requires human merge intervention. Dev-poll's try_direct_merge() catches approved PRs eventually, but with delay and false escalations. This should be fixed before the factory tackles #466 (which will generate multiple PRs).
|
|
||||||
- Adoption milestone nearly complete: 4/5 objectives DONE (#393, #394, #395, #534). Only #466 remains.
|
|
||||||
- Scale milestone remains premature — no external users yet. Plugin system, community formulas, hosted option all deferred.
|
|
||||||
- Vault infrastructure deployed but empty — no procurement requests needed this cycle.
|
|
||||||
- RESOURCES.md unchanged since last run.
|
|
||||||
|
|
||||||
## Deferred (in tree, not filed)
|
|
||||||
- Scale: plugin system, community formulas, hosted option — premature without users
|
|
||||||
- Vision items: #1 (halt/resume), #2 (self-tuning), #4 (multi-VPS) — far future
|
|
||||||
- Prediction #529 (swap 57%): still in prediction/backlog, stable
|
|
||||||
- Prediction #580 (Caddy CVEs): watching, static site mitigates RCE
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
# Planner run — 2026-03-25 07:15 UTC
|
|
||||||
|
|
||||||
## Predictions triaged
|
|
||||||
- #656: DISMISS — planning deadlock is resolved by this run; tree staleness corrected
|
|
||||||
- #655: PROMOTE_BACKLOG → #663 — check_active guard should log when skipping
|
|
||||||
- #644: WATCH — disk P1 reactive cleanup works; not urgent enough for backlog slot
|
|
||||||
- #643: PROMOTE_BACKLOG → #664 — supervisor should clean stale phase files for closed issues
|
|
||||||
- #642: DISMISS — HTTP 401 likely caused by #653 (wrong remote), now fixed
|
|
||||||
- #640: DISMISS — all 5 bootstrap failures (#634-638) closed; remaining fixes in pipeline
|
|
||||||
|
|
||||||
## Prerequisite tree updates
|
|
||||||
- Resolved: #568 (merge guard) moved from BLOCKED to DONE — was closed but tree was stale
|
|
||||||
- Resolved: bootstrap hardening issues #634-638, #652, #653, #658 all closed
|
|
||||||
- Discovered: #660 (Forgejo reachability) and #661 (Woodpecker CI wiring) as remaining init prerequisites
|
|
||||||
- Added: new objective "Factory operational reliability" with #663 and #664
|
|
||||||
- Added: #668 (end-to-end init smoke test) as init prerequisite
|
|
||||||
- Status change: #466 marked ESCALATED (bounced + gardener escalation, awaiting human decision)
|
|
||||||
|
|
||||||
## Top 5 constraints
|
|
||||||
1. #466 — example project — blocks Adoption completion — ESCALATED, awaiting human decision
|
|
||||||
2. #661 — Woodpecker CI wiring — blocks init completeness — in backlog with priority
|
|
||||||
3. #668 — init smoke test — blocks init quality assurance — filed this run
|
|
||||||
4. #663 — guard logging — prevents invisible agent dropouts — filed this run (from #655)
|
|
||||||
5. #664 — stale phase cleanup — reduces supervisor signal noise — filed this run (from #643)
|
|
||||||
|
|
||||||
## Stuck issues detected
|
|
||||||
- #466: BOUNCED (1x, "too large for single session") + ESCALATED (gardener: "needs human decision on approach") — added comment noting escalation seen, suggested option (b) local demo may be viable
|
|
||||||
|
|
||||||
## Issues created
|
|
||||||
- #663: fix: check_active guard should log to stderr when skipping (from prediction #655)
|
|
||||||
- #664: fix: supervisor should clean up stale PHASE:escalate files (from prediction #643)
|
|
||||||
- #668: feat: end-to-end disinto init smoke test in CI (new constraint — init quality)
|
|
||||||
|
|
||||||
## Priority label changes
|
|
||||||
- Added priority: #661, #663, #664, #668 (top constraints)
|
|
||||||
- Kept priority: #466 (still #1 constraint, though escalated)
|
|
||||||
- No priority removed (only #466 had it previously)
|
|
||||||
|
|
||||||
## Observations
|
|
||||||
- Massive progress since last run (2026-03-23): ~30 issues closed in 48h. Bootstrap hardening wave (#634-638, #652, #653, #658) completed. Full stack containerized (#618, #619).
|
|
||||||
- Planner missed 2026-03-24 run due to active-state guard deploy gap (#655). State files created 21h after guard merged. No visible signal of the missed run.
|
|
||||||
- Factory nearly idle: 1 in-progress (#660), 1 backlog (#661), plus 3 newly filed. After current pipeline clears, #466 is the only Adoption item left.
|
|
||||||
- Adoption milestone 4/5 complete. #466 is stuck on human decision (external vs local demo). The containerized stack work makes option (b) viable — suggested in comment.
|
|
||||||
- #568 (merge guard) was marked BLOCKED in tree but actually closed — 2-day stale tree from missed planner run.
|
|
||||||
- Predictor signal quality: 6 predictions, all substantive. 2 promoted, 1 watched, 3 dismissed. Better signal-to-noise than earlier runs.
|
|
||||||
- RESOURCES.md unchanged. Vault empty (no procurement requests).
|
|
||||||
|
|
||||||
## Deferred (in tree, not filed)
|
|
||||||
- Scale: plugin system, community formulas, hosted option — premature without users
|
|
||||||
- Vision items: #1 (halt/resume), #2 (self-tuning), #4 (multi-VPS) — far future
|
|
||||||
- Prediction #644 (disk P1): watching, reactive cleanup works
|
|
||||||
- Prediction #580 (Caddy CVEs): still in prediction/backlog, static site mitigates
|
|
||||||
- Prediction #529 (swap 57%): stable, not trending worse
|
|
||||||
|
|
@ -1,54 +0,0 @@
|
||||||
# Planner run — 2026-03-26 07:15 UTC
|
|
||||||
|
|
||||||
## Predictions triaged
|
|
||||||
- #644: DISMISS — disk P1 handled by supervisor reactive cleanup, no persistent issue
|
|
||||||
- #580: DISMISS — Caddy CVEs don't apply to static site (no FastCGI/PHP)
|
|
||||||
- #529: DISMISS — swap stable at 57%, supervisor monitors, not trending worse
|
|
||||||
- #446: DISMISS — root cause fixed by #465/#510 escalation path improvements
|
|
||||||
All 4 were prediction/backlog → migrated to prediction/dismissed per #686 policy. Created prediction/dismissed label (was missing).
|
|
||||||
|
|
||||||
## Prerequisite tree updates
|
|
||||||
- Resolved: #660 (Forgejo reachability), #661 (Woodpecker CI wiring), #668 (init smoke test), #663 (guard logging), #664 (stale phase cleanup) — all closed
|
|
||||||
- Objectives completed: bootstrap (#393) now FULLY DONE (all prereqs resolved), factory operational reliability DONE
|
|
||||||
- New objectives added: exec agent (#699) DONE, rent-a-human (#679) DONE, skill package distribution (#710-#712), observable addressables (#718)
|
|
||||||
- Discovered: #710-#715 skill package initiative (created since last run), with #714/#715 as duplicates of #710/#711
|
|
||||||
|
|
||||||
## Top 5 constraints
|
|
||||||
1. #466 — example project — blocks Adoption completion — ESCALATED 3 days, no human response
|
|
||||||
2. #710 — skill package creation — enables distribution — in backlog with priority
|
|
||||||
3. #718 — observable addressables — Ship milestone bridge — filed this run
|
|
||||||
4. #714/#715 — duplicate issues — pending gardener cleanup
|
|
||||||
5. prediction/backlog label — should be deleted per #686 — needs admin action
|
|
||||||
|
|
||||||
## Stuck issues detected
|
|
||||||
- #466: ESCALATED (since 2026-03-23, 3 days) — no human response. Dev-agent bounced as too large, gardener escalated for approach decision. Not re-promoting — already has priority label.
|
|
||||||
|
|
||||||
## Issues created
|
|
||||||
- #718: feat: observable addressables — engagement measurement for deployed artifacts — Ship milestone, Fold 2→3 bridge
|
|
||||||
|
|
||||||
## Priority label changes
|
|
||||||
- Added priority: #710 (next ready pipeline work)
|
|
||||||
- Kept priority: #466 (still #1 constraint, escalated)
|
|
||||||
- No priority removed
|
|
||||||
|
|
||||||
## Label changes
|
|
||||||
- #710, #711, #712, #713: added backlog (were unlabeled, invisible to pipeline)
|
|
||||||
- #644, #580, #529, #446: relabeled from prediction/backlog to prediction/dismissed, closed
|
|
||||||
- Created prediction/dismissed label (id: 1335444) — missing despite #686 implementation
|
|
||||||
|
|
||||||
## Observations
|
|
||||||
- Explosive progress continues: ~30 issues closed since last run (2026-03-25). Bootstrap fully hardened, init smoke test passing, exec agent deployed, rent-a-human implemented.
|
|
||||||
- Factory nearly idle: no backlog items existed until this run labeled #710-#713. The skill package chain is the only ready work.
|
|
||||||
- Skill package initiative (#710-#715) appeared since last run — created from #709 research. Two parallel tracks with duplicates (#710/#714, #711/#715). Preferred #710 chain, flagged duplicates for gardener.
|
|
||||||
- Adoption milestone 4/5 complete. #466 stuck 3 days on human decision. All technical prerequisites resolved. The human bottleneck is real.
|
|
||||||
- Ship milestone entering scope: #679 (rent-a-human) and #699 (exec agent) already done. #718 (observable addressables) filed. Deploy profiles and assumptions register remain untracked.
|
|
||||||
- VISION.md updated this cycle (3c97ddb) — factory primitives (resources, addressables, observables) now formally defined. Tree reflects this.
|
|
||||||
- RESOURCES.md unchanged. Vault empty (no procurement requests).
|
|
||||||
- Graph report clean: no cycles, no bottlenecks. Orphan issues are all closed bug fixes — expected.
|
|
||||||
|
|
||||||
## Deferred (in tree, not filed)
|
|
||||||
- Ship: deploy profiles per artifact type — premature until skill package or example project demonstrates need
|
|
||||||
- Ship: assumptions register — needs design decision on format
|
|
||||||
- Ship: vault-gated fold transitions — vault infrastructure exists, fold logic not yet designed
|
|
||||||
- Scale: plugin system, community formulas, hosted option — premature without users
|
|
||||||
- Vision items: #1 (halt/resume), #2 (self-tuning), #4 (multi-VPS) — far future
|
|
||||||
|
|
@ -48,30 +48,33 @@ log "--- Planner run start ---"
|
||||||
|
|
||||||
# ── Load formula + context ───────────────────────────────────────────────
|
# ── Load formula + context ───────────────────────────────────────────────
|
||||||
load_formula "$FACTORY_ROOT/formulas/run-planner.toml"
|
load_formula "$FACTORY_ROOT/formulas/run-planner.toml"
|
||||||
build_context_block VISION.md AGENTS.md RESOURCES.md planner/prerequisite-tree.md
|
build_context_block VISION.md AGENTS.md ops:RESOURCES.md ops:prerequisites.md
|
||||||
|
|
||||||
# ── Build structural analysis graph ──────────────────────────────────────
|
# ── Build structural analysis graph ──────────────────────────────────────
|
||||||
build_graph_section
|
build_graph_section
|
||||||
|
|
||||||
|
# ── Ensure ops repo is available ───────────────────────────────────────
|
||||||
|
ensure_ops_repo
|
||||||
|
|
||||||
# ── Read planner memory ─────────────────────────────────────────────────
|
# ── Read planner memory ─────────────────────────────────────────────────
|
||||||
MEMORY_BLOCK=""
|
MEMORY_BLOCK=""
|
||||||
MEMORY_FILE="$PROJECT_REPO_ROOT/planner/MEMORY.md"
|
MEMORY_FILE="$OPS_REPO_ROOT/knowledge/planner-memory.md"
|
||||||
if [ -f "$MEMORY_FILE" ]; then
|
if [ -f "$MEMORY_FILE" ]; then
|
||||||
MEMORY_BLOCK="
|
MEMORY_BLOCK="
|
||||||
### planner/MEMORY.md (persistent memory from prior runs)
|
### knowledge/planner-memory.md (persistent memory from prior runs)
|
||||||
$(cat "$MEMORY_FILE")
|
$(cat "$MEMORY_FILE")
|
||||||
"
|
"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# ── Read recent journal files ──────────────────────────────────────────
|
# ── Read recent journal files ──────────────────────────────────────────
|
||||||
JOURNAL_BLOCK=""
|
JOURNAL_BLOCK=""
|
||||||
JOURNAL_DIR="$PROJECT_REPO_ROOT/planner/journal"
|
JOURNAL_DIR="$OPS_REPO_ROOT/journal/planner"
|
||||||
if [ -d "$JOURNAL_DIR" ]; then
|
if [ -d "$JOURNAL_DIR" ]; then
|
||||||
# Load last 5 journal files (most recent first) for run history context
|
# Load last 5 journal files (most recent first) for run history context
|
||||||
JOURNAL_FILES=$(find "$JOURNAL_DIR" -name '*.md' -type f | sort -r | head -5)
|
JOURNAL_FILES=$(find "$JOURNAL_DIR" -name '*.md' -type f | sort -r | head -5)
|
||||||
if [ -n "$JOURNAL_FILES" ]; then
|
if [ -n "$JOURNAL_FILES" ]; then
|
||||||
JOURNAL_BLOCK="
|
JOURNAL_BLOCK="
|
||||||
### Recent journal entries (planner/journal/)
|
### Recent journal entries (journal/planner/)
|
||||||
"
|
"
|
||||||
while IFS= read -r jf; do
|
while IFS= read -r jf; do
|
||||||
JOURNAL_BLOCK="${JOURNAL_BLOCK}
|
JOURNAL_BLOCK="${JOURNAL_BLOCK}
|
||||||
|
|
|
||||||
|
|
@ -1,77 +0,0 @@
|
||||||
# Prerequisite Tree
|
|
||||||
<!-- Last updated: 2026-03-26 -->
|
|
||||||
|
|
||||||
## Objective: One-command bootstrap — `disinto init` (#393)
|
|
||||||
- [x] Core agent loop stable (Foundation)
|
|
||||||
- [x] Multi-project support (Foundation)
|
|
||||||
- [x] Guard allows formula agents in worktrees (#487)
|
|
||||||
- [x] Bundled dust cleanup — set-euo-pipefail (#516)
|
|
||||||
- [x] Agent-session.sh pre-register worktree trust (#514)
|
|
||||||
- [x] Bootstrap hardening — Forgejo INSTALL_LOCK (#634), su-exec (#635), admin user (#636), DNS (#637), crontab (#638), auth (#652), remote target (#653), token creation (#658)
|
|
||||||
- [x] Agents container reaches Forgejo — env.sh override (#660)
|
|
||||||
- [x] Woodpecker CI wiring during init (#661)
|
|
||||||
- [x] End-to-end init smoke test (#668)
|
|
||||||
Status: DONE — all prerequisites resolved, init fully functional
|
|
||||||
|
|
||||||
## Objective: Documentation site with quickstart (#394)
|
|
||||||
- [x] disinto init working (#393)
|
|
||||||
Status: DONE — #394 closed
|
|
||||||
|
|
||||||
## Objective: Metrics dashboard (#395)
|
|
||||||
- [x] disinto init working (#393)
|
|
||||||
- [x] Supervisor formula stable
|
|
||||||
Status: DONE — #395 closed
|
|
||||||
|
|
||||||
## Objective: Example project demonstrating full lifecycle (#466)
|
|
||||||
- [x] disinto init working (#393)
|
|
||||||
- [ ] Human decision on implementation approach (external repo vs local demo) — blocked-on-vault
|
|
||||||
Status: BLOCKED — bounced by dev-agent (too large), routed to vault for human decision
|
|
||||||
|
|
||||||
## Objective: Landing page communicating value proposition (#534)
|
|
||||||
- [x] disinto init working (#393)
|
|
||||||
- [x] Documentation site live (#394)
|
|
||||||
- [x] Planner-created issues retain labels reliably (#535)
|
|
||||||
Status: DONE — #534 closed
|
|
||||||
|
|
||||||
## Objective: Autonomous PR merge pipeline (#568)
|
|
||||||
- [x] PreToolUse guard allows merge API calls from phase-handler (#568)
|
|
||||||
Status: DONE — #568 closed
|
|
||||||
|
|
||||||
## Objective: Unified escalation path (#510)
|
|
||||||
- [x] PHASE:escalate replaces PHASE:needs_human (supersedes #465)
|
|
||||||
Status: DONE — #510 closed
|
|
||||||
|
|
||||||
## Objective: Vault as procurement gate + RESOURCES.md inventory (#504)
|
|
||||||
- [x] RESOURCES.md exists
|
|
||||||
- [x] Vault poll scripts deployed (vault-poll.sh)
|
|
||||||
Status: DONE — #504 closed
|
|
||||||
|
|
||||||
## Objective: Factory operational reliability
|
|
||||||
- [x] check_active guard logs when skipping (#663)
|
|
||||||
- [x] Supervisor cleans stale PHASE:escalate files (#664)
|
|
||||||
Status: DONE — both fixes merged
|
|
||||||
|
|
||||||
## Objective: Exec agent — interactive executive assistant (#699)
|
|
||||||
- [x] Matrix bot infrastructure
|
|
||||||
- [x] CHARACTER.md personality definition
|
|
||||||
- [x] exec-session.sh implementation
|
|
||||||
Status: DONE — #699 closed
|
|
||||||
|
|
||||||
## Objective: Rent-a-human — formula-dispatchable human action drafts (#679)
|
|
||||||
- [x] Formula infrastructure (run-rent-a-human.toml)
|
|
||||||
- [x] Vault gating for human actions
|
|
||||||
Status: DONE — #679 closed
|
|
||||||
|
|
||||||
## Objective: Skill package distribution (#710 → #711 → #712)
|
|
||||||
- [ ] Create disinto skill package — SKILL.md + helper scripts (#710) — in backlog, priority
|
|
||||||
- [ ] Publish to ClawHub registry (#711) — in backlog, depends on #710
|
|
||||||
- [ ] Submit to secondary registries (#712) — in backlog, depends on #711
|
|
||||||
- [ ] Evaluate MCP server wrapper (#713) — in backlog, independent
|
|
||||||
- Note: #714, #715 flagged as duplicates of #710, #711 — pending gardener cleanup
|
|
||||||
Status: READY — no blocking prerequisites
|
|
||||||
|
|
||||||
## Objective: Observable addressables — engagement measurement (#718)
|
|
||||||
- [ ] Lightweight analytics on disinto.ai (#718) — in backlog
|
|
||||||
- [ ] Deploy formula verifies measurement is live
|
|
||||||
- [ ] Planner consumes engagement data
|
|
||||||
Status: READY — Ship milestone, Fold 2 → Fold 3 bridge
|
|
||||||
|
|
@ -15,7 +15,7 @@ The predictor's own prediction history (open + closed issues) serves as its
|
||||||
memory — it reviews what was actioned, dismissed, or deferred to decide where
|
memory — it reviews what was actioned, dismissed, or deferred to decide where
|
||||||
to focus next. No hardcoded signal categories; Claude decides where to look
|
to focus next. No hardcoded signal categories; Claude decides where to look
|
||||||
based on available data: prerequisite tree, evidence directories, VISION.md,
|
based on available data: prerequisite tree, evidence directories, VISION.md,
|
||||||
RESOURCES.md, open issues, agent logs, and external signals (via web search).
|
RESOURCES.md (from ops repo), open issues, agent logs, and external signals (via web search).
|
||||||
|
|
||||||
Files up to 5 actions per run (predictions + dispatches combined). Each
|
Files up to 5 actions per run (predictions + dispatches combined). Each
|
||||||
exploit counts as 2 (prediction + action dispatch). The predictor MUST NOT
|
exploit counts as 2 (prediction + action dispatch). The predictor MUST NOT
|
||||||
|
|
@ -41,11 +41,11 @@ RAM < 2000 MB).
|
||||||
interactive session
|
interactive session
|
||||||
|
|
||||||
**Environment variables consumed**:
|
**Environment variables consumed**:
|
||||||
- `FORGE_TOKEN`, `FORGE_PREDICTOR_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`
|
- `FORGE_TOKEN`, `FORGE_PREDICTOR_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`, `OPS_REPO_ROOT`
|
||||||
- `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by predictor-run.sh)
|
- `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by predictor-run.sh)
|
||||||
|
|
||||||
**Lifecycle**: predictor-run.sh (daily 06:00 cron) → lock + memory guard →
|
**Lifecycle**: predictor-run.sh (daily 06:00 cron) → lock + memory guard →
|
||||||
load formula + context (AGENTS.md, RESOURCES.md, VISION.md, prerequisite-tree.md)
|
load formula + context (AGENTS.md, VISION.md from code repo; RESOURCES.md, prerequisites.md from ops repo)
|
||||||
→ create tmux session → Claude fetches prediction history (open + closed) →
|
→ create tmux session → Claude fetches prediction history (open + closed) →
|
||||||
reviews track record (actioned/dismissed/watching) → finds weaknesses
|
reviews track record (actioned/dismissed/watching) → finds weaknesses
|
||||||
(prerequisite tree gaps, thin evidence, stale watches, external risks) →
|
(prerequisite tree gaps, thin evidence, stale watches, external risks) →
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ log "--- Predictor run start ---"
|
||||||
|
|
||||||
# ── Load formula + context ───────────────────────────────────────────────
|
# ── Load formula + context ───────────────────────────────────────────────
|
||||||
load_formula "$FACTORY_ROOT/formulas/run-predictor.toml"
|
load_formula "$FACTORY_ROOT/formulas/run-predictor.toml"
|
||||||
build_context_block AGENTS.md RESOURCES.md VISION.md planner/prerequisite-tree.md
|
build_context_block AGENTS.md ops:RESOURCES.md VISION.md ops:prerequisites.md
|
||||||
|
|
||||||
# ── Build structural analysis graph ──────────────────────────────────────
|
# ── Build structural analysis graph ──────────────────────────────────────
|
||||||
build_graph_section
|
build_graph_section
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,10 @@
|
||||||
|
|
||||||
name = "disinto"
|
name = "disinto"
|
||||||
repo = "johba/disinto"
|
repo = "johba/disinto"
|
||||||
|
ops_repo = "johba/disinto-ops"
|
||||||
forge_url = "http://localhost:3000"
|
forge_url = "http://localhost:3000"
|
||||||
repo_root = "/home/YOU/dark-factory"
|
repo_root = "/home/YOU/dark-factory"
|
||||||
|
ops_repo_root = "/home/YOU/disinto-ops"
|
||||||
primary_branch = "main"
|
primary_branch = "main"
|
||||||
|
|
||||||
[ci]
|
[ci]
|
||||||
|
|
|
||||||
|
|
@ -32,8 +32,8 @@ log() {
|
||||||
# Caddy structured access log (JSON lines)
|
# Caddy structured access log (JSON lines)
|
||||||
CADDY_LOG="${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}"
|
CADDY_LOG="${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}"
|
||||||
|
|
||||||
# Evidence output directory (committed to git)
|
# Evidence output directory (committed to ops repo)
|
||||||
EVIDENCE_DIR="${FACTORY_ROOT}/evidence/engagement"
|
EVIDENCE_DIR="${OPS_REPO_ROOT}/evidence/engagement"
|
||||||
|
|
||||||
# Report date — defaults to today
|
# Report date — defaults to today
|
||||||
REPORT_DATE=$(date -u +%Y-%m-%d)
|
REPORT_DATE=$(date -u +%Y-%m-%d)
|
||||||
|
|
|
||||||
|
|
@ -324,7 +324,7 @@ Read `VISION.md` at the repo root for the full vision. Then cross-reference
|
||||||
with the prerequisite tree:
|
with the prerequisite tree:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cat "${PROJECT_REPO_ROOT}/planner/prerequisite-tree.md"
|
cat "${OPS_REPO_ROOT}/prerequisites.md"
|
||||||
```
|
```
|
||||||
|
|
||||||
The prerequisite tree maps vision objectives to concrete issues. Items marked
|
The prerequisite tree maps vision objectives to concrete issues. Items marked
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ while [[ $# -gt 0 ]]; do
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
: "${PROJECT_REPO_ROOT:?PROJECT_REPO_ROOT is required}"
|
: "${OPS_REPO_ROOT:?OPS_REPO_ROOT is required}"
|
||||||
|
|
||||||
if [[ -z "$agent" ]]; then
|
if [[ -z "$agent" ]]; then
|
||||||
echo "Error: agent name is required (planner, supervisor, predictor)" >&2
|
echo "Error: agent name is required (planner, supervisor, predictor)" >&2
|
||||||
|
|
@ -51,8 +51,8 @@ fi
|
||||||
|
|
||||||
# --- Resolve journal directory ---
|
# --- Resolve journal directory ---
|
||||||
case "$agent" in
|
case "$agent" in
|
||||||
planner) journal_dir="${PROJECT_REPO_ROOT}/planner/journal" ;;
|
planner) journal_dir="${OPS_REPO_ROOT}/journal/planner" ;;
|
||||||
supervisor) journal_dir="${PROJECT_REPO_ROOT}/supervisor/journal" ;;
|
supervisor) journal_dir="${OPS_REPO_ROOT}/journal/supervisor" ;;
|
||||||
predictor)
|
predictor)
|
||||||
echo "The predictor does not write journal files."
|
echo "The predictor does not write journal files."
|
||||||
echo "Its memory lives in forge issues labeled 'prediction/unreviewed' and 'prediction/actioned'."
|
echo "Its memory lives in forge issues labeled 'prediction/unreviewed' and 'prediction/actioned'."
|
||||||
|
|
|
||||||
|
|
@ -31,10 +31,9 @@ runs directly from cron like the planner and predictor.
|
||||||
- `formulas/run-supervisor.toml` — Execution spec: five steps (preflight review,
|
- `formulas/run-supervisor.toml` — Execution spec: five steps (preflight review,
|
||||||
health-assessment, decide-actions, report, journal) with `needs` dependencies.
|
health-assessment, decide-actions, report, journal) with `needs` dependencies.
|
||||||
Claude evaluates all metrics and takes actions in a single interactive session
|
Claude evaluates all metrics and takes actions in a single interactive session
|
||||||
- `supervisor/journal/*.md` — Daily health logs from each supervisor run (local,
|
- `$OPS_REPO_ROOT/journal/supervisor/*.md` — Daily health logs from each supervisor run
|
||||||
committed periodically)
|
|
||||||
- `supervisor/PROMPT.md` — Best-practices reference for remediation actions
|
- `supervisor/PROMPT.md` — Best-practices reference for remediation actions
|
||||||
- `supervisor/best-practices/*.md` — Domain-specific remediation guides (memory,
|
- `$OPS_REPO_ROOT/knowledge/*.md` — Domain-specific remediation guides (memory,
|
||||||
disk, CI, git, dev-agent, review-agent, forge)
|
disk, CI, git, dev-agent, review-agent, forge)
|
||||||
- `supervisor/supervisor-poll.sh` — Legacy bash orchestrator (superseded by
|
- `supervisor/supervisor-poll.sh` — Legacy bash orchestrator (superseded by
|
||||||
supervisor-run.sh + formula)
|
supervisor-run.sh + formula)
|
||||||
|
|
@ -43,7 +42,7 @@ runs directly from cron like the planner and predictor.
|
||||||
P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping).
|
P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping).
|
||||||
|
|
||||||
**Environment variables consumed**:
|
**Environment variables consumed**:
|
||||||
- `FORGE_TOKEN`, `FORGE_SUPERVISOR_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`
|
- `FORGE_TOKEN`, `FORGE_SUPERVISOR_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `PROJECT_NAME`, `PROJECT_REPO_ROOT`, `OPS_REPO_ROOT`
|
||||||
- `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by supervisor-run.sh)
|
- `PRIMARY_BRANCH`, `CLAUDE_MODEL` (set to sonnet by supervisor-run.sh)
|
||||||
- `WOODPECKER_TOKEN`, `WOODPECKER_SERVER`, `WOODPECKER_DB_PASSWORD`, `WOODPECKER_DB_USER`, `WOODPECKER_DB_HOST`, `WOODPECKER_DB_NAME` — CI database queries
|
- `WOODPECKER_TOKEN`, `WOODPECKER_SERVER`, `WOODPECKER_DB_PASSWORD`, `WOODPECKER_DB_USER`, `WOODPECKER_DB_HOST`, `WOODPECKER_DB_NAME` — CI database queries
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,14 +15,14 @@ You are the supervisor agent for `$FORGE_REPO`. You were called because
|
||||||
|
|
||||||
Fix the issue yourself. You have full shell access and `--dangerously-skip-permissions`.
|
Fix the issue yourself. You have full shell access and `--dangerously-skip-permissions`.
|
||||||
|
|
||||||
Before acting, read the relevant best-practices file:
|
Before acting, read the relevant knowledge file from the ops repo:
|
||||||
- Memory issues → `cat ${FACTORY_ROOT}/supervisor/best-practices/memory.md`
|
- Memory issues → `cat ${OPS_REPO_ROOT}/knowledge/memory.md`
|
||||||
- Disk issues → `cat ${FACTORY_ROOT}/supervisor/best-practices/disk.md`
|
- Disk issues → `cat ${OPS_REPO_ROOT}/knowledge/disk.md`
|
||||||
- CI issues → `cat ${FACTORY_ROOT}/supervisor/best-practices/ci.md`
|
- CI issues → `cat ${OPS_REPO_ROOT}/knowledge/ci.md`
|
||||||
- forge / rate limits → `cat ${FACTORY_ROOT}/supervisor/best-practices/forge.md`
|
- forge / rate limits → `cat ${OPS_REPO_ROOT}/knowledge/forge.md`
|
||||||
- Dev-agent issues → `cat ${FACTORY_ROOT}/supervisor/best-practices/dev-agent.md`
|
- Dev-agent issues → `cat ${OPS_REPO_ROOT}/knowledge/dev-agent.md`
|
||||||
- Review-agent issues → `cat ${FACTORY_ROOT}/supervisor/best-practices/review-agent.md`
|
- Review-agent issues → `cat ${OPS_REPO_ROOT}/knowledge/review-agent.md`
|
||||||
- Git issues → `cat ${FACTORY_ROOT}/supervisor/best-practices/git.md`
|
- Git issues → `cat ${OPS_REPO_ROOT}/knowledge/git.md`
|
||||||
|
|
||||||
## Credentials & API Access
|
## Credentials & API Access
|
||||||
|
|
||||||
|
|
@ -83,7 +83,7 @@ When you see "Dev-agent blocked: last N polls all report 'no ready issues'":
|
||||||
|
|
||||||
File a vault procurement item so the human is notified through the vault:
|
File a vault procurement item so the human is notified through the vault:
|
||||||
```bash
|
```bash
|
||||||
cat > "${PROJECT_REPO_ROOT}/vault/pending/supervisor-$(date -u +%Y%m%d-%H%M)-issue.md" <<'VAULT_EOF'
|
cat > "${OPS_REPO_ROOT}/vault/pending/supervisor-$(date -u +%Y%m%d-%H%M)-issue.md" <<'VAULT_EOF'
|
||||||
# <What is needed>
|
# <What is needed>
|
||||||
## What
|
## What
|
||||||
<description of the problem and why the supervisor cannot fix it>
|
<description of the problem and why the supervisor cannot fix it>
|
||||||
|
|
@ -106,13 +106,13 @@ FIXED: <what you did>
|
||||||
```
|
```
|
||||||
or
|
or
|
||||||
```
|
```
|
||||||
VAULT: filed vault/pending/<id>.md — <what's needed>
|
VAULT: filed $OPS_REPO_ROOT/vault/pending/<id>.md — <what's needed>
|
||||||
```
|
```
|
||||||
|
|
||||||
## Learning
|
## Learning
|
||||||
|
|
||||||
If you discover something new, append it to the relevant best-practices file:
|
If you discover something new, append it to the relevant knowledge file in the ops repo:
|
||||||
```bash
|
```bash
|
||||||
bash ${FACTORY_ROOT}/supervisor/update-prompt.sh "best-practices/<file>.md" "### Lesson title
|
echo "### Lesson title
|
||||||
Description of what you learned."
|
Description of what you learned." >> "${OPS_REPO_ROOT}/knowledge/<file>.md"
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
# CI Best Practices
|
|
||||||
|
|
||||||
## Environment
|
|
||||||
- Woodpecker CI at localhost:8000 (Docker backend)
|
|
||||||
- Postgres DB: use `wpdb` helper from env.sh
|
|
||||||
- Woodpecker API: use `woodpecker_api` helper from env.sh
|
|
||||||
- Example (harb): CI images pre-built at `registry.niovi.voyage/harb/*:latest`
|
|
||||||
|
|
||||||
## Safe Fixes
|
|
||||||
- Retrigger CI (preferred, automated): Woodpecker API POST
|
|
||||||
```bash
|
|
||||||
woodpecker_api "/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUMBER}" -X POST
|
|
||||||
```
|
|
||||||
supervisor-poll.sh does this automatically for infra failures (max 2 retries).
|
|
||||||
- Retrigger CI (manual fallback): push empty commit to PR branch
|
|
||||||
```bash
|
|
||||||
cd /tmp/${PROJECT_NAME}-worktree-<issue> && git commit --allow-empty -m "ci: retrigger" --no-verify && git push origin <branch> --force
|
|
||||||
```
|
|
||||||
- Restart woodpecker-agent: `sudo systemctl restart woodpecker-agent`
|
|
||||||
- View pipeline status: `wpdb -c "SELECT number, status FROM pipelines WHERE repo_id=$WOODPECKER_REPO_ID ORDER BY number DESC LIMIT 5;"`
|
|
||||||
- View failed steps: `bash ${FACTORY_ROOT}/lib/ci-debug.sh failures <pipeline-number>`
|
|
||||||
- View step logs: `bash ${FACTORY_ROOT}/lib/ci-debug.sh logs <pipeline-number> <step-name>`
|
|
||||||
|
|
||||||
## Dangerous (escalate)
|
|
||||||
- Restarting woodpecker-server (drops all running pipelines)
|
|
||||||
- Modifying pipeline configs in `.woodpecker/` directory
|
|
||||||
|
|
||||||
## Known Issues
|
|
||||||
- forge rate-limits SSH clones. `git` step fails with exit 128. Retrigger usually works.
|
|
||||||
- `log_entries` table grows fast (was 5.6GB once). Truncate periodically.
|
|
||||||
- Example (harb): Running CI + harb stack = 14+ containers on 8GB. Memory pressure is real.
|
|
||||||
- CI images take hours to rebuild. Never run `docker system prune -a`.
|
|
||||||
|
|
||||||
## Lessons Learned
|
|
||||||
- Exit code 128 on git step = forge rate limit, not a code problem. Retrigger.
|
|
||||||
- Exit code 137 = OOM kill. Check memory, kill stale processes, retrigger.
|
|
||||||
- `node-quality` step fails on eslint/typescript errors — these need code fixes, not CI fixes.
|
|
||||||
|
|
||||||
### Example (harb): FEE_DEST address must match DeployLocal.sol
|
|
||||||
When DeployLocal.sol changes the feeDest address, bootstrap-common.sh must also be updated.
|
|
||||||
Current feeDest = keccak256('harb.local.feeDest') = 0x8A9145E1Ea4C4d7FB08cF1011c8ac1F0e10F9383.
|
|
||||||
Symptom: bootstrap step exits 1 after 'Granting recenter access to deployer' with no error — setRecenterAccess reverts because wrong address is impersonated.
|
|
||||||
|
|
||||||
### Example (harb): keccak-derived FEE_DEST requires anvil_setBalance before impersonation
|
|
||||||
When FEE_DEST is a keccak-derived address (e.g. keccak256('harb.local.feeDest')), it has zero ETH balance. Any function that calls `anvil_impersonateAccount` then `cast send --from $FEE_DEST --unlocked` will fail silently (output redirected to LOG_FILE) but exit 1 due to gas deduction failure. Fix: add `cast rpc anvil_setBalance "$FEE_DEST" "0xDE0B6B3A7640000"` before impersonation. Applied in both bootstrap-common.sh and red-team.sh.
|
|
||||||
|
|
@ -1,93 +0,0 @@
|
||||||
# Dev-Agent Best Practices
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
- `dev-poll.sh` (cron */10) → finds ready backlog issues → spawns `dev-agent.sh`
|
|
||||||
- `dev-agent.sh` uses `claude -p` for implementation, runs in git worktree
|
|
||||||
- Lock file: `/tmp/dev-agent.lock` (contains PID)
|
|
||||||
- Status file: `/tmp/dev-agent-status`
|
|
||||||
- Worktrees: `/tmp/${PROJECT_NAME}-worktree-<issue-number>/`
|
|
||||||
|
|
||||||
## Safe Fixes
|
|
||||||
- Remove stale lock: `rm -f /tmp/dev-agent.lock` (only if PID is dead)
|
|
||||||
- Kill stuck agent: `kill <pid>` then clean lock
|
|
||||||
- Restart on derailed PR: `bash ${FACTORY_ROOT}/dev/dev-agent.sh <issue-number> &`
|
|
||||||
- Clean worktree: `cd $PROJECT_REPO_ROOT && git worktree remove /tmp/${PROJECT_NAME}-worktree-<N> --force`
|
|
||||||
- Remove `in-progress` label if agent died without cleanup:
|
|
||||||
```bash
|
|
||||||
forge_api DELETE "/issues/<N>/labels/in-progress"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Dangerous (escalate)
|
|
||||||
- Restarting agent on an issue that has an open PR with review changes — may lose context
|
|
||||||
- Anything that modifies the PR branch history
|
|
||||||
- Closing PRs or issues
|
|
||||||
|
|
||||||
## Known Issues
|
|
||||||
- `claude -p -c` (continue) fails if session was compacted — falls back to fresh `-p`
|
|
||||||
- CI_FIX_COUNT is now reset on CI pass (fixed 2026-03-12), so each review phase gets fresh CI fix budget
|
|
||||||
- Worktree creation fails if main repo has stale rebase — auto-heals now
|
|
||||||
- Large text in jq `--arg` can break — write to file first
|
|
||||||
- `$([ "$VAR" = true ] && echo "...")` crashes under `set -euo pipefail`
|
|
||||||
|
|
||||||
## Lessons Learned
|
|
||||||
- Agents don't have memory between tasks — full context must be in the prompt
|
|
||||||
- Prior art injection (closed PR diffs) prevents rework
|
|
||||||
- Feature issues MUST list affected e2e test files
|
|
||||||
- CI fix loop is essential — first attempt rarely works
|
|
||||||
- CLAUDE_TIMEOUT=7200 (2h) is needed for complex issues
|
|
||||||
|
|
||||||
## Dependency Resolution
|
|
||||||
|
|
||||||
**Trust closed state.** If a dependency issue is closed, the code is on the primary branch. Period.
|
|
||||||
|
|
||||||
DO NOT try to find the specific PR that closed an issue. This is over-engineering that causes false negatives:
|
|
||||||
- forge shares issue/PR numbering — no guaranteed relationship
|
|
||||||
- PRs don't always mention the issue number in title/body
|
|
||||||
- Searching last N closed PRs misses older merges
|
|
||||||
- The dev-agent closes issues after merging, so closed = merged
|
|
||||||
|
|
||||||
The only check needed: `issue.state == "closed"`.
|
|
||||||
|
|
||||||
### False Positive: Status Unchanged Alert
|
|
||||||
The supervisor-poll alert 'status unchanged for Nmin' is a false positive for complex implementation tasks. The status is set to 'claude assessing + implementing' at the START of the `timeout 7200 claude -p ...` call and only updates after Claude finishes. Normal complex tasks (multi-file Solidity changes + forge test) take 45-90 minutes. To distinguish a false positive from a real stuck agent: check that the claude PID is alive (`ps -p <PID>`), consuming CPU (>0%), and has active threads (`pstree -p <PID>`). If the process is alive and using CPU, do NOT restart it — this wastes completed work.
|
|
||||||
|
|
||||||
### False Positive: 'Waiting for CI + Review' Alert
|
|
||||||
The 'status unchanged for Nmin' alert is also a false positive when status is 'waiting for CI + review on PR #N (round R)'. This is an intentional sleep/poll loop — the agent is waiting for CI to pass and then for review-poll to post a review. CI can take 20–40 minutes; review follows. Do NOT restart the agent. Confirm by checking: (1) agent PID is alive, (2) CI commit status via `forge_api GET /commits/<sha>/status`, (3) review-poll log shows it will pick up the PR on next cycle.
|
|
||||||
|
|
||||||
### False Positive: Shared Status File Causes Giant Age (29M+ min)
|
|
||||||
When the status file `/tmp/dev-agent-status` doesn't exist, `stat -c %Y` fails and the supervisor falls back to epoch 0. The computed age is then `NOW_EPOCH/60 ≈ 29,567,290 min`, which is unmistakably a false positive.
|
|
||||||
Root cause: the status file is not per-project (tracked as disinto issue #423). It can be missing if: (1) the agent has not written to it yet, (2) cleanup ran early, or (3) another project's cleanup deleted it.
|
|
||||||
Fix: confirm the agent PID is alive and the tmux session shows active work, then touch the file: `printf '[%s] dev-agent #NNN: <phase> (<project>)\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" > /tmp/dev-agent-status`. This clears the alert without restarting anything.
|
|
||||||
|
|
||||||
### PR CI vs Push CI mismatch causes silent stall in awaiting_review
|
|
||||||
When push CI passes but PR CI fails (e.g., a duplicate-detection step only runs on pull_request events), the phase-handler transitions to PHASE:awaiting_review without detecting the PR CI failure. The agent then sleeps in the review-poll loop indefinitely.
|
|
||||||
|
|
||||||
Symptom: PR CI=failure but dev-agent phase=awaiting_review, status shows 'waiting for CI + review'.
|
|
||||||
|
|
||||||
Fix: inject the CI failure info into the Claude session with agent_inject_into_session, pointing to the duplicate blocks and telling Claude to fix + push + write PHASE:awaiting_ci. The phase-handler's awaiting_review loop checks for phase file mtime changes every 5 min and will re-enter the main loop automatically.
|
|
||||||
|
|
||||||
### Push CI vs PR CI mismatch — agent picks wrong pipeline number
|
|
||||||
When the phase-handler injects 'CI failed' with a push pipeline number (e.g. #622), the agent checks that push pipeline, finds it passed, and concludes 'CI OK' — setting PHASE:awaiting_review despite the PR pipeline (#623) being the one that actually failed.
|
|
||||||
Root cause: the injected event does not always carry the correct pipeline number.
|
|
||||||
Symptom: agent in awaiting_review with PR CI=failure and push CI=success.
|
|
||||||
Fix: inject with explicit pipeline #623 (the pull_request event pipeline), point to the failing step and the specific duplicate blocks to fix. Use: woodpecker_api /repos/4/pipelines?event=pull_request (or look for event=pull_request in recent pipelines list) to find the correct pipeline number before injecting.
|
|
||||||
|
|
||||||
### Race Condition: Review Posted Before PHASE:awaiting_review Transitions
|
|
||||||
**Symptom:** Dev-agent status unchanged at 'waiting for review on PR #N', no `review-injected-disinto-N` sentinel, but a formal review already exists on forge and `/tmp/disinto-review-output-N.json` was written before the phase file updated.
|
|
||||||
|
|
||||||
**Root cause:** review-pr.sh runs while the dev-agent is still in PHASE:awaiting_ci. inject_review_into_dev_session returns early (phase check fails). On subsequent review-poll cycles, the PR is skipped (formal review already exists for SHA), so inject is never called again.
|
|
||||||
|
|
||||||
**Fix:** Manually inject the review:
|
|
||||||
```bash
|
|
||||||
source /home/debian/dark-factory/lib/env.sh
|
|
||||||
PROJECT_TOML=/home/debian/dark-factory/projects/disinto.toml
|
|
||||||
source /home/debian/dark-factory/lib/load-project.sh "$PROJECT_TOML"
|
|
||||||
PHASE_FILE="/tmp/dev-session-${PROJECT_NAME}-<ISSUE>.phase"
|
|
||||||
PR_NUM=<N>; PR_BRANCH="fix/issue-<ISSUE>"; PR_SHA=$(cat /tmp/dev-session-${PROJECT_NAME}-<ISSUE>.phase | grep SHA | cut -d: -f2 || git -C $PROJECT_REPO_ROOT rev-parse origin/$PR_BRANCH)
|
|
||||||
REVIEW_TEXT=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" "${FORGE_API}/issues/${PR_NUM}/comments?limit=50" | jq -r --arg sha "$PR_SHA" '[.[] | select(.body | contains("<!-- reviewed: " + $sha))] | last // empty | .body')
|
|
||||||
INJECT_MSG="Review: REQUEST_CHANGES on PR #${PR_NUM}:\n\n${REVIEW_TEXT}\n\nInstructions:\n1. Address each piece of feedback carefully.\n2. Run lint and tests when done.\n3. Commit your changes and push: git push origin ${PR_BRANCH}\n4. Write: echo PHASE:awaiting_ci > "${PHASE_FILE}"\n5. Stop and wait for the next CI result."
|
|
||||||
INJECT_TMP=$(mktemp); printf '%s' "$INJECT_MSG" > "$INJECT_TMP"
|
|
||||||
tmux load-buffer -b inject "$INJECT_TMP" && tmux paste-buffer -t "dev-${PROJECT_NAME}-<ISSUE>" -b inject && sleep 0.5 && tmux send-keys -t "dev-${PROJECT_NAME}-<ISSUE>" '' Enter
|
|
||||||
touch "/tmp/review-injected-${PROJECT_NAME}-${PR_NUM}"
|
|
||||||
```
|
|
||||||
Then update /tmp/dev-agent-status to reflect current work.
|
|
||||||
|
|
@ -1,24 +0,0 @@
|
||||||
# Disk Best Practices
|
|
||||||
|
|
||||||
## Safe Fixes
|
|
||||||
- Docker cleanup: `sudo docker system prune -f` (keeps images, removes stopped containers + dangling layers)
|
|
||||||
- Truncate supervisor logs >5MB: `truncate -s 0 <file>`
|
|
||||||
- Remove stale worktrees: check `/tmp/${PROJECT_NAME}-worktree-*`, only if dev-agent not running on them
|
|
||||||
- Woodpecker log_entries: `DELETE FROM log_entries WHERE id < (SELECT max(id) - 100000 FROM log_entries);` then `VACUUM;`
|
|
||||||
- Node module caches in worktrees: `rm -rf /tmp/${PROJECT_NAME}-worktree-*/node_modules/`
|
|
||||||
- Git garbage collection: `cd $PROJECT_REPO_ROOT && git gc --prune=now`
|
|
||||||
|
|
||||||
## Dangerous (escalate)
|
|
||||||
- `docker system prune -a --volumes` — deletes ALL images including CI build cache
|
|
||||||
- Deleting anything in `$PROJECT_REPO_ROOT/` that's tracked by git
|
|
||||||
- Truncating Woodpecker DB tables other than log_entries
|
|
||||||
|
|
||||||
## Known Disk Hogs
|
|
||||||
- Woodpecker `log_entries` table: grows to 5GB+. Truncate periodically.
|
|
||||||
- Docker overlay layers: survive normal prune. `-a` variant kills everything.
|
|
||||||
- Git worktrees in /tmp: accumulate node_modules, build artifacts
|
|
||||||
- Forge cache in `~/.foundry/cache/`: can grow large with many compilations
|
|
||||||
|
|
||||||
## Lessons Learned
|
|
||||||
- After truncating log_entries, run VACUUM FULL (reclaims actual disk space)
|
|
||||||
- Docker ghost overlay layers need `prune -a` but that kills CI images — only do this if truly desperate
|
|
||||||
|
|
@ -1,36 +0,0 @@
|
||||||
# Forge Best Practices
|
|
||||||
|
|
||||||
## Rate Limiting
|
|
||||||
The forge (Forgejo/Gitea) may rate-limit SSH and HTTPS clones. Symptoms:
|
|
||||||
- Woodpecker `git` step fails with exit code 128
|
|
||||||
- Multiple pipelines fail in quick succession with the same error
|
|
||||||
- Retriggers make it WORSE by adding more clone attempts
|
|
||||||
|
|
||||||
### What To Do
|
|
||||||
- **Do NOT retrigger** during a rate-limit storm. Wait 10-15 minutes.
|
|
||||||
- Check if multiple pipelines failed on `git` step recently:
|
|
||||||
```bash
|
|
||||||
wpdb -c "SELECT number, status, to_timestamp(started) FROM pipelines WHERE repo_id=$WOODPECKER_REPO_ID AND status='failure' ORDER BY number DESC LIMIT 5;"
|
|
||||||
wpdb -c "SELECT s.name, s.exit_code FROM steps s JOIN pipelines p ON s.pipeline_id=p.id WHERE p.number=<N> AND p.repo_id=$WOODPECKER_REPO_ID AND s.state='failure';"
|
|
||||||
```
|
|
||||||
- If multiple `git` failures with exit 128 in the last 15 min → it's rate limiting. Wait.
|
|
||||||
- Only retrigger after 15+ minutes of no CI activity.
|
|
||||||
|
|
||||||
### How To Retrigger Safely
|
|
||||||
```bash
|
|
||||||
cd <worktree> && git commit --allow-empty -m "ci: retrigger" --no-verify && git push origin <branch> --force
|
|
||||||
```
|
|
||||||
|
|
||||||
### Prevention
|
|
||||||
- The system runs 3 agents staggered by 3 minutes. During heavy development, many PRs trigger CI simultaneously.
|
|
||||||
- One pipeline at a time is ideal on this VPS (resource + rate limit reasons).
|
|
||||||
- If >3 pipelines are pending/running, do NOT create more work.
|
|
||||||
|
|
||||||
## API Tokens
|
|
||||||
- API token is in `.env` as `FORGE_TOKEN` — loaded via env.sh.
|
|
||||||
- Review bot has a separate token (`$FORGE_REVIEW_TOKEN`) for formal reviews.
|
|
||||||
- With local Forgejo, tokens don't expire. For remote forges, check provider docs.
|
|
||||||
|
|
||||||
## Lessons Learned
|
|
||||||
- Retrigger storm on 2026-03-12: supervisor + dev-agent both retriggered during rate limit, caused 5+ failed pipelines. Added cooldown awareness.
|
|
||||||
- Empty commit retrigger works but adds noise to git history. Acceptable tradeoff.
|
|
||||||
|
|
@ -1,61 +0,0 @@
|
||||||
# Git Best Practices
|
|
||||||
|
|
||||||
## Environment
|
|
||||||
- Repo: `$PROJECT_REPO_ROOT`, remote: `$PROJECT_REMOTE`
|
|
||||||
- Branch: `$PRIMARY_BRANCH` (protected — no direct push, PRs only)
|
|
||||||
- Worktrees: `/tmp/${PROJECT_NAME}-worktree-<issue>/`
|
|
||||||
|
|
||||||
## Safe Fixes
|
|
||||||
- Abort stale rebase: `cd $PROJECT_REPO_ROOT && git rebase --abort`
|
|
||||||
- Switch to $PRIMARY_BRANCH: `git checkout $PRIMARY_BRANCH`
|
|
||||||
- Prune worktrees: `git worktree prune`
|
|
||||||
- Reset dirty state: `git checkout -- .` (only uncommitted changes)
|
|
||||||
- Fetch latest: `git fetch origin $PRIMARY_BRANCH`
|
|
||||||
|
|
||||||
## Auto-fixable by Supervisor
|
|
||||||
- **Merge conflict on approved PR**: rebase onto $PRIMARY_BRANCH and force-push
|
|
||||||
```bash
|
|
||||||
cd /tmp/${PROJECT_NAME}-worktree-<issue> || git worktree add /tmp/${PROJECT_NAME}-worktree-<issue> <branch>
|
|
||||||
cd /tmp/${PROJECT_NAME}-worktree-<issue>
|
|
||||||
git fetch origin $PRIMARY_BRANCH
|
|
||||||
git rebase origin/$PRIMARY_BRANCH
|
|
||||||
# If conflict is trivial (NatSpec, comments): resolve and continue
|
|
||||||
# If conflict is code logic: escalate to Clawy
|
|
||||||
git push origin <branch> --force
|
|
||||||
```
|
|
||||||
- **Stale rebase**: `git rebase --abort && git checkout $PRIMARY_BRANCH`
|
|
||||||
- **Wrong branch**: `git checkout $PRIMARY_BRANCH`
|
|
||||||
|
|
||||||
## Dangerous (escalate)
|
|
||||||
- `git reset --hard` on any branch with unpushed work
|
|
||||||
- Deleting remote branches
|
|
||||||
- Force-pushing to any branch
|
|
||||||
- Anything on the $PRIMARY_BRANCH branch directly
|
|
||||||
|
|
||||||
## Known Issues
|
|
||||||
- Main repo MUST be on $PRIMARY_BRANCH at all times. Dev work happens in worktrees.
|
|
||||||
- Stale rebases (detached HEAD) break all worktree creation — silent pipeline stall.
|
|
||||||
- `git worktree add` fails if target directory exists (even empty). Remove first.
|
|
||||||
- Many old branches exist locally (100+). Normal — don't bulk-delete.
|
|
||||||
|
|
||||||
## Evolution Pipeline
|
|
||||||
- The evolution pipeline (`tools/push3-evolution/evolve.sh`) temporarily modifies
|
|
||||||
`onchain/src/OptimizerV3.sol` and `onchain/src/OptimizerV3Push3.sol` during runs.
|
|
||||||
- **DO NOT revert these files while evolution is running** (check: `pgrep -f evolve.sh`).
|
|
||||||
- If `/tmp/evolution.pid` exists and the PID is alive, the dirty state is intentional.
|
|
||||||
- Evolution will restore the files when it finishes.
|
|
||||||
|
|
||||||
## Lessons Learned
|
|
||||||
- NEVER delete remote branches before confirming merge. Close PR, rebase locally, force-push if needed.
|
|
||||||
- Stale rebase caused 5h pipeline stall once (2026-03-11). Auto-heal added to dev-agent.
|
|
||||||
- lint-staged hooks fail when `forge` not in PATH. Use `--no-verify` when committing from scripts.
|
|
||||||
|
|
||||||
### PR #608 Post-Mortem (2026-03-12/13)
|
|
||||||
PR sat blocked for 24 hours while 21 other PRs merged. Root causes:
|
|
||||||
1. **Supervisor didn't detect merge conflicts** — only checked CI state, not `mergeable`. Fixed: now checks `mergeable=false` as first condition.
|
|
||||||
2. **Supervisor didn't detect stale REQUEST_CHANGES** — review bot requested changes, dev-agent never came back to fix them, moved on to other issues. Need: detect "PR has REQUEST_CHANGES older than N hours with no new push."
|
|
||||||
3. **No staleness kill switch** — after N merge conflicts or N days, a PR should be auto-closed and the issue reopened for a fresh attempt. Rebasing across 21 commits is more work than starting over.
|
|
||||||
|
|
||||||
**Rules derived:**
|
|
||||||
- Supervisor should close PRs that are >24h old with merge conflicts and no recent activity. Reopen the parent issue with a note pointing to the closed PR as prior art.
|
|
||||||
- Dev-agent must not abandon a PR with REQUEST_CHANGES — either fix or close it before moving to new work.
|
|
||||||
|
|
@ -1,29 +0,0 @@
|
||||||
# Memory Best Practices
|
|
||||||
|
|
||||||
## Environment
|
|
||||||
- VPS: 8GB RAM, 4GB swap, Debian
|
|
||||||
- Running: Docker stack (8 containers), Woodpecker CI, OpenClaw gateway
|
|
||||||
|
|
||||||
## Safe Fixes (no permission needed)
|
|
||||||
- Kill stale `claude` processes (>3h old): `pgrep -f "claude" --older 10800 | xargs kill`
|
|
||||||
- Drop filesystem caches: `sync && echo 3 | sudo tee /proc/sys/vm/drop_caches`
|
|
||||||
- Restart bloated Anvil: `sudo docker restart ${PROJECT_NAME}-anvil-1` (grows to 12GB+ over hours)
|
|
||||||
- Kill orphan node processes from dead worktrees
|
|
||||||
|
|
||||||
## Dangerous (escalate)
|
|
||||||
- `docker system prune -a --volumes` — kills CI images, hours to rebuild
|
|
||||||
- Stopping project stack containers — breaks dev environment
|
|
||||||
- OOM that survives all safe fixes — needs human decision on what to kill
|
|
||||||
|
|
||||||
## Known Memory Hogs
|
|
||||||
- `claude` processes from dev-agent: 200MB+ each, can zombie
|
|
||||||
- `dockerd`: 600MB+ baseline (normal)
|
|
||||||
- `openclaw-gateway`: 500MB+ (normal)
|
|
||||||
- Anvil container: starts small, grows unbounded over hours
|
|
||||||
- `forge build` with via_ir: can spike to 4GB+. Use `--skip test script` to reduce.
|
|
||||||
- Vite dev servers inside containers: 150MB+ each
|
|
||||||
|
|
||||||
## Lessons Learned
|
|
||||||
- After killing processes, always `sync && echo 3 | sudo tee /proc/sys/vm/drop_caches`
|
|
||||||
- Swap doesn't drain from dropping caches alone — it's actual paged-out process memory
|
|
||||||
- Running CI + full project stack = 14+ containers on 8GB. Only one pipeline at a time.
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
# Review Agent Best Practices
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
- `review-poll.sh` (cron */10) → finds open PRs with CI pass + no review → spawns `review-pr.sh`
|
|
||||||
- `review-pr.sh` uses `claude -p` to review the diff, posts structured comment
|
|
||||||
- Uses `review_bot` forge account for formal reviews (separate from main account)
|
|
||||||
- Skips WIP/draft PRs (`[WIP]` in title or draft flag)
|
|
||||||
|
|
||||||
## Safe Fixes
|
|
||||||
- Manually trigger review: `bash ${FACTORY_ROOT}/review/review-pr.sh <pr-number>`
|
|
||||||
- Force re-review: `bash ${FACTORY_ROOT}/review/review-pr.sh <pr-number> --force`
|
|
||||||
- Check review log: `tail -20 ${FACTORY_ROOT}/review/review.log`
|
|
||||||
|
|
||||||
## Common Failures
|
|
||||||
- **"SKIP: CI=failure"** — review bot won't review until CI passes. Fix CI first.
|
|
||||||
- **"already reviewed"** — bot checks `<!-- reviewed: SHA -->` comment marker. Use `--force` to override.
|
|
||||||
- **Review error comment** — uses `<!-- review-error: SHA -->` marker, does NOT count as reviewed. Bot should retry automatically.
|
|
||||||
- **Self-narration collapse** — bot sometimes narrates instead of producing structured JSON. JSON output format in the prompt prevents this.
|
|
||||||
- **Hallucinated findings** — bot may flag non-issues. This needs Clawy's judgment — escalate.
|
|
||||||
|
|
||||||
## Monitoring
|
|
||||||
- Unreviewed PRs with CI pass for >1h → supervisor-poll.sh auto-triggers review
|
|
||||||
- Review errors should resolve on next poll cycle
|
|
||||||
- If same PR fails review 3+ times → likely a prompt issue, escalate
|
|
||||||
|
|
||||||
## Lessons Learned
|
|
||||||
- Review bot must output JSON — prevents self-narration collapse
|
|
||||||
- DISCUSS verdict should be treated same as REQUEST_CHANGES by dev-agent
|
|
||||||
- Error comments must NOT include `<!-- reviewed: SHA -->` — would falsely mark as reviewed
|
|
||||||
- Review bot uses forge formal reviews API — branch protection requires different user than PR author
|
|
||||||
|
|
@ -218,7 +218,7 @@ echo ""
|
||||||
|
|
||||||
echo "## Pending Vault Items"
|
echo "## Pending Vault Items"
|
||||||
_found_vault=false
|
_found_vault=false
|
||||||
for _vf in "${PROJECT_REPO_ROOT}/vault/pending/"*.md; do
|
for _vf in "${OPS_REPO_ROOT}/vault/pending/"*.md; do
|
||||||
[ -f "$_vf" ] || continue
|
[ -f "$_vf" ] || continue
|
||||||
_found_vault=true
|
_found_vault=true
|
||||||
_vtitle=$(grep -m1 '^# ' "$_vf" | sed 's/^# //' || basename "$_vf")
|
_vtitle=$(grep -m1 '^# ' "$_vf" | sed 's/^# //' || basename "$_vf")
|
||||||
|
|
|
||||||
|
|
@ -10,11 +10,11 @@ to a human by writing `PHASE:escalate` to a phase file — using the same
|
||||||
unified escalation path as dev/action agents.
|
unified escalation path as dev/action agents.
|
||||||
|
|
||||||
**Pipeline B — Procurement (*.md)**: The planner files resource requests as
|
**Pipeline B — Procurement (*.md)**: The planner files resource requests as
|
||||||
markdown files in `vault/pending/`. `vault-poll.sh` notifies the human via
|
markdown files in `$OPS_REPO_ROOT/vault/pending/`. `vault-poll.sh` notifies the human via
|
||||||
vault/forge. The human fulfills the request (creates accounts, provisions infra,
|
vault/forge. The human fulfills the request (creates accounts, provisions infra,
|
||||||
adds secrets to `.env`) and moves the file to `vault/approved/`.
|
adds secrets to `.env`) and moves the file to `$OPS_REPO_ROOT/vault/approved/`.
|
||||||
`vault-fire.sh` then extracts the proposed entry and appends it to
|
`vault-fire.sh` then extracts the proposed entry and appends it to
|
||||||
`RESOURCES.md`.
|
`$OPS_REPO_ROOT/RESOURCES.md`.
|
||||||
|
|
||||||
**Pipeline C — Rent-a-Human (outreach drafts)**: Any agent can dispatch the
|
**Pipeline C — Rent-a-Human (outreach drafts)**: Any agent can dispatch the
|
||||||
`run-rent-a-human` formula (via an `action` issue) when a task requires a human
|
`run-rent-a-human` formula (via an `action` issue) when a task requires a human
|
||||||
|
|
@ -30,15 +30,15 @@ needed — the human reviews and publishes directly.
|
||||||
- `vault/vault-agent.sh` — Classifies and routes pending JSON actions via `claude -p`: auto-approve, auto-reject, or escalate to human
|
- `vault/vault-agent.sh` — Classifies and routes pending JSON actions via `claude -p`: auto-approve, auto-reject, or escalate to human
|
||||||
- `vault/vault-env.sh` — Shared env setup for vault sub-scripts: sources `lib/env.sh`, overrides `FORGE_TOKEN` with `FORGE_VAULT_TOKEN`, sets `VAULT_TOKEN` for vault-runner container
|
- `vault/vault-env.sh` — Shared env setup for vault sub-scripts: sources `lib/env.sh`, overrides `FORGE_TOKEN` with `FORGE_VAULT_TOKEN`, sets `VAULT_TOKEN` for vault-runner container
|
||||||
- `vault/PROMPT.md` — System prompt for the vault agent's Claude invocation
|
- `vault/PROMPT.md` — System prompt for the vault agent's Claude invocation
|
||||||
- `vault/vault-fire.sh` — Executes an approved action (JSON) in an **ephemeral Docker container** with vault-only secrets injected (GITHUB_TOKEN, CLAWHUB_TOKEN — never exposed to agents). For deployment actions, calls `lib/ci-helpers.sh:ci_promote()` to gate production promotes via Woodpecker environments. Writes RESOURCES.md entry for procurement MD approvals.
|
- `vault/vault-fire.sh` — Executes an approved action (JSON) in an **ephemeral Docker container** with vault-only secrets injected (GITHUB_TOKEN, CLAWHUB_TOKEN — never exposed to agents). For deployment actions, calls `lib/ci-helpers.sh:ci_promote()` to gate production promotes via Woodpecker environments. Writes `$OPS_REPO_ROOT/RESOURCES.md` entry for procurement MD approvals.
|
||||||
- `vault/vault-reject.sh` — Marks a JSON action as rejected
|
- `vault/vault-reject.sh` — Marks a JSON action as rejected
|
||||||
- `formulas/run-rent-a-human.toml` — Formula for human-action drafts: Claude researches target platform norms, drafts copy-paste content, writes to `vault/outreach/{platform}/drafts/`, notifies human via vault/forge
|
- `formulas/run-rent-a-human.toml` — Formula for human-action drafts: Claude researches target platform norms, drafts copy-paste content, writes to `vault/outreach/{platform}/drafts/`, notifies human via vault/forge
|
||||||
|
|
||||||
**Procurement flow**:
|
**Procurement flow** (all vault items live in `$OPS_REPO_ROOT/vault/`):
|
||||||
1. Planner drops `vault/pending/<name>.md` with what/why/proposed RESOURCES.md entry
|
1. Planner drops `$OPS_REPO_ROOT/vault/pending/<name>.md` with what/why/proposed RESOURCES.md entry
|
||||||
2. `vault-poll.sh` notifies human via vault/forge
|
2. `vault-poll.sh` notifies human via vault/forge
|
||||||
3. Human fulfills: creates account, adds secrets to `.env`, moves file to `vault/approved/`
|
3. Human fulfills: creates account, adds secrets to `.env`, moves file to `approved/`
|
||||||
4. `vault-fire.sh` extracts proposed entry, appends to RESOURCES.md, moves to `vault/fired/`
|
4. `vault-fire.sh` extracts proposed entry, appends to `$OPS_REPO_ROOT/RESOURCES.md`, moves to `fired/`
|
||||||
5. Next planner run reads RESOURCES.md → new capability available → unblocks prerequisite tree
|
5. Next planner run reads RESOURCES.md → new capability available → unblocks prerequisite tree
|
||||||
|
|
||||||
**Environment variables consumed**:
|
**Environment variables consumed**:
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
# Vault Agent
|
# Vault Agent
|
||||||
|
|
||||||
You are the vault agent for `$FORGE_REPO`. You were called by
|
You are the vault agent for `$FORGE_REPO`. You were called by
|
||||||
`vault-poll.sh` because one or more actions in `vault/pending/` need
|
`vault-poll.sh` because one or more actions in `$OPS_REPO_ROOT/vault/pending/` need
|
||||||
classification and routing.
|
classification and routing.
|
||||||
|
|
||||||
## Two Pipelines
|
## Two Pipelines
|
||||||
|
|
@ -16,7 +16,7 @@ You classify and route these: auto-approve, escalate, or reject.
|
||||||
Resource requests from the planner. These always escalate to the human —
|
Resource requests from the planner. These always escalate to the human —
|
||||||
you do NOT auto-approve or reject procurement requests. The human fulfills
|
you do NOT auto-approve or reject procurement requests. The human fulfills
|
||||||
the request (creates accounts, provisions infra, adds secrets to .env)
|
the request (creates accounts, provisions infra, adds secrets to .env)
|
||||||
and moves the file from `vault/pending/` to `vault/approved/`.
|
and moves the file from `$OPS_REPO_ROOT/vault/pending/` to `$OPS_REPO_ROOT/vault/approved/`.
|
||||||
`vault-fire.sh` then writes the RESOURCES.md entry.
|
`vault-fire.sh` then writes the RESOURCES.md entry.
|
||||||
|
|
||||||
## Your Job (Action Gating only)
|
## Your Job (Action Gating only)
|
||||||
|
|
@ -116,7 +116,7 @@ ROUTE: <action-id> → <auto-approve|escalate|reject> — <reason>
|
||||||
|
|
||||||
- Process ALL pending JSON actions in the batch. Never skip silently.
|
- Process ALL pending JSON actions in the batch. Never skip silently.
|
||||||
- For auto-approved actions, fire them immediately via `vault-fire.sh`.
|
- For auto-approved actions, fire them immediately via `vault-fire.sh`.
|
||||||
- For escalated actions, move to `vault/approved/` only AFTER human approval.
|
- For escalated actions, move to `$OPS_REPO_ROOT/vault/approved/` only AFTER human approval.
|
||||||
- Read the action JSON carefully. Check the payload, not just the metadata.
|
- Read the action JSON carefully. Check the payload, not just the metadata.
|
||||||
- Ignore `.md` files in pending/ — those are procurement requests handled
|
- Ignore `.md` files in pending/ — those are procurement requests handled
|
||||||
separately by vault-poll.sh and the human.
|
separately by vault-poll.sh and the human.
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,10 @@ set -euo pipefail
|
||||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
source "${SCRIPT_DIR}/vault-env.sh"
|
source "${SCRIPT_DIR}/vault-env.sh"
|
||||||
|
|
||||||
VAULT_DIR="${FACTORY_ROOT}/vault"
|
VAULT_SCRIPT_DIR="${FACTORY_ROOT}/vault"
|
||||||
PROMPT_FILE="${VAULT_DIR}/PROMPT.md"
|
OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault"
|
||||||
LOGFILE="${VAULT_DIR}/vault.log"
|
PROMPT_FILE="${VAULT_SCRIPT_DIR}/PROMPT.md"
|
||||||
|
LOGFILE="${VAULT_SCRIPT_DIR}/vault.log"
|
||||||
CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}"
|
CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}"
|
||||||
|
|
||||||
log() {
|
log() {
|
||||||
|
|
@ -26,7 +27,7 @@ log() {
|
||||||
ACTIONS_BATCH=""
|
ACTIONS_BATCH=""
|
||||||
ACTION_COUNT=0
|
ACTION_COUNT=0
|
||||||
|
|
||||||
for action_file in "${VAULT_DIR}/pending/"*.json; do
|
for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do
|
||||||
[ -f "$action_file" ] || continue
|
[ -f "$action_file" ] || continue
|
||||||
|
|
||||||
ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null)
|
ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null)
|
||||||
|
|
@ -36,7 +37,7 @@ for action_file in "${VAULT_DIR}/pending/"*.json; do
|
||||||
if ! jq empty < "$action_file" 2>/dev/null; then
|
if ! jq empty < "$action_file" 2>/dev/null; then
|
||||||
ACTION_ID=$(basename "$action_file" .json)
|
ACTION_ID=$(basename "$action_file" .json)
|
||||||
log "malformed JSON: $action_file — rejecting"
|
log "malformed JSON: $action_file — rejecting"
|
||||||
bash "${VAULT_DIR}/vault-reject.sh" "$ACTION_ID" "malformed JSON" 2>/dev/null || true
|
bash "${VAULT_SCRIPT_DIR}/vault-reject.sh" "$ACTION_ID" "malformed JSON" 2>/dev/null || true
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -66,9 +67,10 @@ ${ACTIONS_BATCH}
|
||||||
|
|
||||||
## Environment
|
## Environment
|
||||||
- FACTORY_ROOT=${FACTORY_ROOT}
|
- FACTORY_ROOT=${FACTORY_ROOT}
|
||||||
- Vault directory: ${VAULT_DIR}
|
- OPS_REPO_ROOT=${OPS_REPO_ROOT}
|
||||||
- vault-fire.sh: bash ${VAULT_DIR}/vault-fire.sh <action-id>
|
- Vault data: ${OPS_VAULT_DIR}
|
||||||
- vault-reject.sh: bash ${VAULT_DIR}/vault-reject.sh <action-id> \"<reason>\"
|
- vault-fire.sh: bash ${VAULT_SCRIPT_DIR}/vault-fire.sh <action-id>
|
||||||
|
- vault-reject.sh: bash ${VAULT_SCRIPT_DIR}/vault-reject.sh <action-id> \"<reason>\"
|
||||||
|
|
||||||
Process each action now. For auto-approve, fire immediately. For reject, call vault-reject.sh.
|
Process each action now. For auto-approve, fire immediately. For reject, call vault-reject.sh.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,10 +17,10 @@ set -euo pipefail
|
||||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
source "${SCRIPT_DIR}/vault-env.sh"
|
source "${SCRIPT_DIR}/vault-env.sh"
|
||||||
|
|
||||||
VAULT_DIR="${FACTORY_ROOT}/vault"
|
OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault"
|
||||||
LOCKS_DIR="${VAULT_DIR}/.locks"
|
LOCKS_DIR="${FACTORY_ROOT}/vault/.locks"
|
||||||
LOGFILE="${VAULT_DIR}/vault.log"
|
LOGFILE="${FACTORY_ROOT}/vault/vault.log"
|
||||||
RESOURCES_FILE="${PROJECT_REPO_ROOT:-${FACTORY_ROOT}}/RESOURCES.md"
|
RESOURCES_FILE="${OPS_REPO_ROOT}/RESOURCES.md"
|
||||||
|
|
||||||
log() {
|
log() {
|
||||||
printf '[%s] vault-fire: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
|
printf '[%s] vault-fire: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
|
||||||
|
|
@ -34,19 +34,19 @@ ACTION_ID="${1:?Usage: vault-fire.sh <item-id>}"
|
||||||
IS_PROCUREMENT=false
|
IS_PROCUREMENT=false
|
||||||
ACTION_FILE=""
|
ACTION_FILE=""
|
||||||
|
|
||||||
if [ -f "${VAULT_DIR}/approved/${ACTION_ID}.md" ]; then
|
if [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" ]; then
|
||||||
IS_PROCUREMENT=true
|
IS_PROCUREMENT=true
|
||||||
ACTION_FILE="${VAULT_DIR}/approved/${ACTION_ID}.md"
|
ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.md"
|
||||||
elif [ -f "${VAULT_DIR}/pending/${ACTION_ID}.md" ]; then
|
elif [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.md" ]; then
|
||||||
IS_PROCUREMENT=true
|
IS_PROCUREMENT=true
|
||||||
mv "${VAULT_DIR}/pending/${ACTION_ID}.md" "${VAULT_DIR}/approved/${ACTION_ID}.md"
|
mv "${OPS_VAULT_DIR}/pending/${ACTION_ID}.md" "${OPS_VAULT_DIR}/approved/${ACTION_ID}.md"
|
||||||
ACTION_FILE="${VAULT_DIR}/approved/${ACTION_ID}.md"
|
ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.md"
|
||||||
log "$ACTION_ID: pending → approved (procurement)"
|
log "$ACTION_ID: pending → approved (procurement)"
|
||||||
elif [ -f "${VAULT_DIR}/approved/${ACTION_ID}.json" ]; then
|
elif [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ]; then
|
||||||
ACTION_FILE="${VAULT_DIR}/approved/${ACTION_ID}.json"
|
ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json"
|
||||||
elif [ -f "${VAULT_DIR}/pending/${ACTION_ID}.json" ]; then
|
elif [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" ]; then
|
||||||
mv "${VAULT_DIR}/pending/${ACTION_ID}.json" "${VAULT_DIR}/approved/${ACTION_ID}.json"
|
mv "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json"
|
||||||
ACTION_FILE="${VAULT_DIR}/approved/${ACTION_ID}.json"
|
ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json"
|
||||||
TMP=$(mktemp)
|
TMP=$(mktemp)
|
||||||
jq '.status = "approved"' "$ACTION_FILE" > "$TMP" && mv "$TMP" "$ACTION_FILE"
|
jq '.status = "approved"' "$ACTION_FILE" > "$TMP" && mv "$TMP" "$ACTION_FILE"
|
||||||
log "$ACTION_ID: pending → approved"
|
log "$ACTION_ID: pending → approved"
|
||||||
|
|
@ -93,7 +93,7 @@ if [ "$IS_PROCUREMENT" = true ]; then
|
||||||
log "$ACTION_ID: wrote RESOURCES.md entry"
|
log "$ACTION_ID: wrote RESOURCES.md entry"
|
||||||
|
|
||||||
# Move to fired/
|
# Move to fired/
|
||||||
mv "$ACTION_FILE" "${VAULT_DIR}/fired/${ACTION_ID}.md"
|
mv "$ACTION_FILE" "${OPS_VAULT_DIR}/fired/${ACTION_ID}.md"
|
||||||
rm -f "${LOCKS_DIR}/${ACTION_ID}.notified"
|
rm -f "${LOCKS_DIR}/${ACTION_ID}.notified"
|
||||||
log "$ACTION_ID: approved → fired (procurement)"
|
log "$ACTION_ID: approved → fired (procurement)"
|
||||||
exit 0
|
exit 0
|
||||||
|
|
@ -122,7 +122,7 @@ if [ -f "${FACTORY_ROOT}/.env.vault.enc" ] && [ -f "${FACTORY_ROOT}/docker-compo
|
||||||
else
|
else
|
||||||
# Fallback for bare-metal or pre-migration setups: run action handler directly
|
# Fallback for bare-metal or pre-migration setups: run action handler directly
|
||||||
log "$ACTION_ID: no .env.vault.enc or docker-compose.yml — running action directly"
|
log "$ACTION_ID: no .env.vault.enc or docker-compose.yml — running action directly"
|
||||||
bash "${VAULT_DIR}/vault-run-action.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1 || FIRE_EXIT=$?
|
bash "${SCRIPT_DIR}/vault-run-action.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1 || FIRE_EXIT=$?
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
@ -132,7 +132,7 @@ if [ "$FIRE_EXIT" -eq 0 ]; then
|
||||||
# Update with fired timestamp and move to fired/
|
# Update with fired timestamp and move to fired/
|
||||||
TMP=$(mktemp)
|
TMP=$(mktemp)
|
||||||
jq --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" '.status = "fired" | .fired_at = $ts' "$ACTION_FILE" > "$TMP" \
|
jq --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" '.status = "fired" | .fired_at = $ts' "$ACTION_FILE" > "$TMP" \
|
||||||
&& mv "$TMP" "${VAULT_DIR}/fired/${ACTION_ID}.json"
|
&& mv "$TMP" "${OPS_VAULT_DIR}/fired/${ACTION_ID}.json"
|
||||||
rm -f "$ACTION_FILE"
|
rm -f "$ACTION_FILE"
|
||||||
log "$ACTION_ID: approved → fired"
|
log "$ACTION_ID: approved → fired"
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -26,8 +26,9 @@ FORGE_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}"
|
||||||
LOGFILE="${FACTORY_ROOT}/vault/vault.log"
|
LOGFILE="${FACTORY_ROOT}/vault/vault.log"
|
||||||
STATUSFILE="/tmp/vault-status"
|
STATUSFILE="/tmp/vault-status"
|
||||||
LOCKFILE="/tmp/vault-poll.lock"
|
LOCKFILE="/tmp/vault-poll.lock"
|
||||||
VAULT_DIR="${FACTORY_ROOT}/vault"
|
VAULT_SCRIPT_DIR="${FACTORY_ROOT}/vault"
|
||||||
LOCKS_DIR="${VAULT_DIR}/.locks"
|
OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault"
|
||||||
|
LOCKS_DIR="${VAULT_SCRIPT_DIR}/.locks"
|
||||||
|
|
||||||
TIMEOUT_HOURS=48
|
TIMEOUT_HOURS=48
|
||||||
|
|
||||||
|
|
@ -78,7 +79,7 @@ unlock_action() {
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
status "phase 1: retrying approved items"
|
status "phase 1: retrying approved items"
|
||||||
|
|
||||||
for action_file in "${VAULT_DIR}/approved/"*.json; do
|
for action_file in "${OPS_VAULT_DIR}/approved/"*.json; do
|
||||||
[ -f "$action_file" ] || continue
|
[ -f "$action_file" ] || continue
|
||||||
ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null)
|
ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null)
|
||||||
[ -z "$ACTION_ID" ] && continue
|
[ -z "$ACTION_ID" ] && continue
|
||||||
|
|
@ -89,7 +90,7 @@ for action_file in "${VAULT_DIR}/approved/"*.json; do
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "retrying approved action: $ACTION_ID"
|
log "retrying approved action: $ACTION_ID"
|
||||||
if bash "${VAULT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then
|
if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then
|
||||||
log "fired $ACTION_ID (retry)"
|
log "fired $ACTION_ID (retry)"
|
||||||
else
|
else
|
||||||
log "ERROR: fire failed for $ACTION_ID (retry)"
|
log "ERROR: fire failed for $ACTION_ID (retry)"
|
||||||
|
|
@ -99,7 +100,7 @@ for action_file in "${VAULT_DIR}/approved/"*.json; do
|
||||||
done
|
done
|
||||||
|
|
||||||
# Retry approved procurement requests (.md)
|
# Retry approved procurement requests (.md)
|
||||||
for req_file in "${VAULT_DIR}/approved/"*.md; do
|
for req_file in "${OPS_VAULT_DIR}/approved/"*.md; do
|
||||||
[ -f "$req_file" ] || continue
|
[ -f "$req_file" ] || continue
|
||||||
REQ_ID=$(basename "$req_file" .md)
|
REQ_ID=$(basename "$req_file" .md)
|
||||||
|
|
||||||
|
|
@ -109,7 +110,7 @@ for req_file in "${VAULT_DIR}/approved/"*.md; do
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "retrying approved procurement: $REQ_ID"
|
log "retrying approved procurement: $REQ_ID"
|
||||||
if bash "${VAULT_DIR}/vault-fire.sh" "$REQ_ID" >> "$LOGFILE" 2>&1; then
|
if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$REQ_ID" >> "$LOGFILE" 2>&1; then
|
||||||
log "fired procurement $REQ_ID (retry)"
|
log "fired procurement $REQ_ID (retry)"
|
||||||
else
|
else
|
||||||
log "ERROR: fire failed for procurement $REQ_ID (retry)"
|
log "ERROR: fire failed for procurement $REQ_ID (retry)"
|
||||||
|
|
@ -126,7 +127,7 @@ status "phase 2: checking escalation timeouts"
|
||||||
NOW_EPOCH=$(date +%s)
|
NOW_EPOCH=$(date +%s)
|
||||||
TIMEOUT_SECS=$((TIMEOUT_HOURS * 3600))
|
TIMEOUT_SECS=$((TIMEOUT_HOURS * 3600))
|
||||||
|
|
||||||
for action_file in "${VAULT_DIR}/pending/"*.json; do
|
for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do
|
||||||
[ -f "$action_file" ] || continue
|
[ -f "$action_file" ] || continue
|
||||||
|
|
||||||
ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null)
|
ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null)
|
||||||
|
|
@ -142,7 +143,7 @@ for action_file in "${VAULT_DIR}/pending/"*.json; do
|
||||||
if [ "$AGE_SECS" -gt "$TIMEOUT_SECS" ]; then
|
if [ "$AGE_SECS" -gt "$TIMEOUT_SECS" ]; then
|
||||||
AGE_HOURS=$((AGE_SECS / 3600))
|
AGE_HOURS=$((AGE_SECS / 3600))
|
||||||
log "timeout: $ACTION_ID escalated ${AGE_HOURS}h ago with no reply — auto-rejecting"
|
log "timeout: $ACTION_ID escalated ${AGE_HOURS}h ago with no reply — auto-rejecting"
|
||||||
bash "${VAULT_DIR}/vault-reject.sh" "$ACTION_ID" "timeout (${AGE_HOURS}h, no human reply)" >> "$LOGFILE" 2>&1 || true
|
bash "${VAULT_SCRIPT_DIR}/vault-reject.sh" "$ACTION_ID" "timeout (${AGE_HOURS}h, no human reply)" >> "$LOGFILE" 2>&1 || true
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
@ -154,7 +155,7 @@ status "phase 3: processing pending actions"
|
||||||
PENDING_COUNT=0
|
PENDING_COUNT=0
|
||||||
PENDING_SUMMARY=""
|
PENDING_SUMMARY=""
|
||||||
|
|
||||||
for action_file in "${VAULT_DIR}/pending/"*.json; do
|
for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do
|
||||||
[ -f "$action_file" ] || continue
|
[ -f "$action_file" ] || continue
|
||||||
|
|
||||||
ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null)
|
ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null)
|
||||||
|
|
@ -181,7 +182,7 @@ if [ "$PENDING_COUNT" -gt 0 ]; then
|
||||||
log "found $PENDING_COUNT pending action(s), invoking vault-agent"
|
log "found $PENDING_COUNT pending action(s), invoking vault-agent"
|
||||||
status "invoking vault-agent for $PENDING_COUNT action(s)"
|
status "invoking vault-agent for $PENDING_COUNT action(s)"
|
||||||
|
|
||||||
bash "${VAULT_DIR}/vault-agent.sh" >> "$LOGFILE" 2>&1 || {
|
bash "${VAULT_SCRIPT_DIR}/vault-agent.sh" >> "$LOGFILE" 2>&1 || {
|
||||||
log "ERROR: vault-agent failed"
|
log "ERROR: vault-agent failed"
|
||||||
}
|
}
|
||||||
fi
|
fi
|
||||||
|
|
@ -193,12 +194,12 @@ status "phase 4: processing pending procurement requests"
|
||||||
|
|
||||||
PROCURE_COUNT=0
|
PROCURE_COUNT=0
|
||||||
|
|
||||||
for req_file in "${VAULT_DIR}/pending/"*.md; do
|
for req_file in "${OPS_VAULT_DIR}/pending/"*.md; do
|
||||||
[ -f "$req_file" ] || continue
|
[ -f "$req_file" ] || continue
|
||||||
REQ_ID=$(basename "$req_file" .md)
|
REQ_ID=$(basename "$req_file" .md)
|
||||||
|
|
||||||
# Check if already notified (marker file)
|
# Check if already notified (marker file)
|
||||||
if [ -f "${VAULT_DIR}/.locks/${REQ_ID}.notified" ]; then
|
if [ -f "${LOCKS_DIR}/${REQ_ID}.notified" ]; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -215,8 +216,8 @@ for req_file in "${VAULT_DIR}/pending/"*.md; do
|
||||||
log "new procurement request: $REQ_ID — $REQ_TITLE"
|
log "new procurement request: $REQ_ID — $REQ_TITLE"
|
||||||
|
|
||||||
# Mark as notified so we don't re-send
|
# Mark as notified so we don't re-send
|
||||||
mkdir -p "${VAULT_DIR}/.locks"
|
mkdir -p "${LOCKS_DIR}"
|
||||||
touch "${VAULT_DIR}/.locks/${REQ_ID}.notified"
|
touch "${LOCKS_DIR}/${REQ_ID}.notified"
|
||||||
|
|
||||||
unlock_action "$REQ_ID"
|
unlock_action "$REQ_ID"
|
||||||
done
|
done
|
||||||
|
|
@ -239,7 +240,7 @@ if [ -n "${FORGE_REPO:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then
|
||||||
ISSUE_NUM=$(printf '%s' "$ACTION_ISSUES" | jq -r ".[$idx].number")
|
ISSUE_NUM=$(printf '%s' "$ACTION_ISSUES" | jq -r ".[$idx].number")
|
||||||
|
|
||||||
# Skip if already processed
|
# Skip if already processed
|
||||||
if [ -f "${VAULT_DIR}/.locks/issue-${ISSUE_NUM}.vault-fired" ]; then
|
if [ -f "${LOCKS_DIR}/issue-${ISSUE_NUM}.vault-fired" ]; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -272,21 +273,21 @@ if [ -n "${FORGE_REPO:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Skip if this action already exists in any stage
|
# Skip if this action already exists in any stage
|
||||||
if [ -f "${VAULT_DIR}/approved/${ACTION_ID}.json" ] || \
|
if [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ] || \
|
||||||
[ -f "${VAULT_DIR}/fired/${ACTION_ID}.json" ] || \
|
[ -f "${OPS_VAULT_DIR}/fired/${ACTION_ID}.json" ] || \
|
||||||
[ -f "${VAULT_DIR}/rejected/${ACTION_ID}.json" ]; then
|
[ -f "${OPS_VAULT_DIR}/rejected/${ACTION_ID}.json" ]; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "vault-bot authorized action on issue #${ISSUE_NUM}: ${ACTION_ID}"
|
log "vault-bot authorized action on issue #${ISSUE_NUM}: ${ACTION_ID}"
|
||||||
printf '%s' "$ACTION_JSON" | jq '.status = "approved"' > "${VAULT_DIR}/approved/${ACTION_ID}.json"
|
printf '%s' "$ACTION_JSON" | jq '.status = "approved"' > "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json"
|
||||||
COMMENT_COUNT=$((COMMENT_COUNT + 1))
|
COMMENT_COUNT=$((COMMENT_COUNT + 1))
|
||||||
|
|
||||||
# Fire the action
|
# Fire the action
|
||||||
if bash "${VAULT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then
|
if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then
|
||||||
log "fired ${ACTION_ID} from issue #${ISSUE_NUM}"
|
log "fired ${ACTION_ID} from issue #${ISSUE_NUM}"
|
||||||
# Mark issue as processed
|
# Mark issue as processed
|
||||||
touch "${VAULT_DIR}/.locks/issue-${ISSUE_NUM}.vault-fired"
|
touch "${LOCKS_DIR}/issue-${ISSUE_NUM}.vault-fired"
|
||||||
else
|
else
|
||||||
log "ERROR: fire failed for ${ACTION_ID} from issue #${ISSUE_NUM}"
|
log "ERROR: fire failed for ${ACTION_ID} from issue #${ISSUE_NUM}"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,9 @@ set -euo pipefail
|
||||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
source "${SCRIPT_DIR}/vault-env.sh"
|
source "${SCRIPT_DIR}/vault-env.sh"
|
||||||
|
|
||||||
VAULT_DIR="${FACTORY_ROOT}/vault"
|
OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault"
|
||||||
LOGFILE="${VAULT_DIR}/vault.log"
|
LOGFILE="${FACTORY_ROOT}/vault/vault.log"
|
||||||
|
LOCKS_DIR="${FACTORY_ROOT}/vault/.locks"
|
||||||
|
|
||||||
log() {
|
log() {
|
||||||
printf '[%s] vault-reject: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
|
printf '[%s] vault-reject: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
|
||||||
|
|
@ -20,10 +21,10 @@ REASON="${2:-unspecified}"
|
||||||
|
|
||||||
# Find the action file
|
# Find the action file
|
||||||
ACTION_FILE=""
|
ACTION_FILE=""
|
||||||
if [ -f "${VAULT_DIR}/pending/${ACTION_ID}.json" ]; then
|
if [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" ]; then
|
||||||
ACTION_FILE="${VAULT_DIR}/pending/${ACTION_ID}.json"
|
ACTION_FILE="${OPS_VAULT_DIR}/pending/${ACTION_ID}.json"
|
||||||
elif [ -f "${VAULT_DIR}/approved/${ACTION_ID}.json" ]; then
|
elif [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ]; then
|
||||||
ACTION_FILE="${VAULT_DIR}/approved/${ACTION_ID}.json"
|
ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json"
|
||||||
else
|
else
|
||||||
log "ERROR: action $ACTION_ID not found in pending/ or approved/"
|
log "ERROR: action $ACTION_ID not found in pending/ or approved/"
|
||||||
exit 1
|
exit 1
|
||||||
|
|
@ -33,10 +34,10 @@ fi
|
||||||
TMP=$(mktemp)
|
TMP=$(mktemp)
|
||||||
jq --arg reason "$REASON" --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
jq --arg reason "$REASON" --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||||
'.status = "rejected" | .rejected_at = $ts | .reject_reason = $reason' \
|
'.status = "rejected" | .rejected_at = $ts | .reject_reason = $reason' \
|
||||||
"$ACTION_FILE" > "$TMP" && mv "$TMP" "${VAULT_DIR}/rejected/${ACTION_ID}.json"
|
"$ACTION_FILE" > "$TMP" && mv "$TMP" "${OPS_VAULT_DIR}/rejected/${ACTION_ID}.json"
|
||||||
rm -f "$ACTION_FILE"
|
rm -f "$ACTION_FILE"
|
||||||
|
|
||||||
# Clean up lock if present
|
# Clean up lock if present
|
||||||
rm -f "${VAULT_DIR}/.locks/${ACTION_ID}.lock"
|
rm -f "${LOCKS_DIR}/${ACTION_ID}.lock"
|
||||||
|
|
||||||
log "$ACTION_ID: rejected — $REASON"
|
log "$ACTION_ID: rejected — $REASON"
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,9 @@
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
VAULT_DIR="${DISINTO_VAULT_DIR:-/home/agent/disinto/vault}"
|
VAULT_SCRIPT_DIR="${DISINTO_VAULT_DIR:-/home/agent/disinto/vault}"
|
||||||
LOGFILE="${VAULT_DIR}/vault.log"
|
OPS_VAULT_DIR="${DISINTO_OPS_VAULT_DIR:-${VAULT_SCRIPT_DIR}}"
|
||||||
|
LOGFILE="${VAULT_SCRIPT_DIR}/vault.log"
|
||||||
ACTION_ID="${1:?Usage: vault-run-action.sh <action-id>}"
|
ACTION_ID="${1:?Usage: vault-run-action.sh <action-id>}"
|
||||||
|
|
||||||
log() {
|
log() {
|
||||||
|
|
@ -22,7 +23,7 @@ log() {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Find action file in approved/
|
# Find action file in approved/
|
||||||
ACTION_FILE="${VAULT_DIR}/approved/${ACTION_ID}.json"
|
ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json"
|
||||||
if [ ! -f "$ACTION_FILE" ]; then
|
if [ ! -f "$ACTION_FILE" ]; then
|
||||||
log "ERROR: action file not found: ${ACTION_FILE}"
|
log "ERROR: action file not found: ${ACTION_FILE}"
|
||||||
echo "ERROR: action file not found: ${ACTION_FILE}" >&2
|
echo "ERROR: action file not found: ${ACTION_FILE}" >&2
|
||||||
|
|
@ -118,7 +119,7 @@ case "$ACTION_TYPE" in
|
||||||
;;
|
;;
|
||||||
|
|
||||||
blog-post|social-post|email-blast|pricing-change|dns-change|stripe-charge)
|
blog-post|social-post|email-blast|pricing-change|dns-change|stripe-charge)
|
||||||
HANDLER="${VAULT_DIR}/handlers/${ACTION_TYPE}.sh"
|
HANDLER="${VAULT_SCRIPT_DIR}/handlers/${ACTION_TYPE}.sh"
|
||||||
if [ -x "$HANDLER" ]; then
|
if [ -x "$HANDLER" ]; then
|
||||||
bash "$HANDLER" "$ACTION_ID" "$PAYLOAD" 2>&1 || FIRE_EXIT=$?
|
bash "$HANDLER" "$ACTION_ID" "$PAYLOAD" 2>&1 || FIRE_EXIT=$?
|
||||||
else
|
else
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue