diff --git a/.env.example b/.env.example index 0062b9e..7f70675 100644 --- a/.env.example +++ b/.env.example @@ -57,7 +57,7 @@ WOODPECKER_DB_NAME=woodpecker # [CONFIG] Postgres database name # (deploy keys) — SSH keys for deployment targets # # To manage vault secrets: disinto secrets edit-vault -# (vault redesign in progress: PR-based approval, see #73-#77) +# See also: vault/run-action.sh, vault/vault-fire.sh # ── Project-specific secrets ────────────────────────────────────────────── # Store all project secrets here so formulas reference env vars, never hardcode. diff --git a/.woodpecker/agent-smoke.sh b/.woodpecker/agent-smoke.sh index 6651c0a..6d1d76b 100644 --- a/.woodpecker/agent-smoke.sh +++ b/.woodpecker/agent-smoke.sh @@ -210,6 +210,10 @@ check_script review/review-poll.sh check_script planner/planner-run.sh lib/agent-session.sh lib/formula-session.sh check_script supervisor/supervisor-poll.sh check_script supervisor/update-prompt.sh +check_script vault/vault-agent.sh +check_script vault/vault-fire.sh +check_script vault/vault-poll.sh +check_script vault/vault-reject.sh check_script supervisor/supervisor-run.sh check_script supervisor/preflight.sh check_script predictor/predictor-run.sh diff --git a/AGENTS.md b/AGENTS.md index f17b287..2871dd3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -3,14 +3,11 @@ ## What this repo is -Disinto is an autonomous code factory. It manages six agents (dev, review, -gardener, supervisor, planner, predictor) that pick up issues from forge, -implement them, review PRs, plan from the vision, and keep the system healthy — -all via cron and `claude -p`. The dispatcher executes formula-based operational -tasks. - -> **Note:** The vault is being redesigned as a PR-based approval workflow on the -> ops repo (see issues #73-#77). Old vault scripts are being removed. +Disinto is an autonomous code factory. It manages seven agents (dev, review, +gardener, supervisor, planner, predictor, vault) that pick up issues from forge, +implement them, review PRs, plan from the vision, gate dangerous actions, and +keep the system healthy — all via cron and `claude -p`. The dispatcher +executes formula-based operational tasks. See `README.md` for the full architecture and `disinto-factory/SKILL.md` for setup. @@ -26,7 +23,7 @@ disinto/ (code repo) ├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper) │ preflight.sh — pre-flight data collection for supervisor formula │ supervisor-poll.sh — legacy bash orchestrator (superseded) -├── vault/ vault-env.sh — shared env setup (vault redesign in progress, see #73-#77) +├── vault/ vault-poll.sh, vault-agent.sh, vault-fire.sh — action gating + procurement ├── lib/ env.sh, agent-session.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, build-graph.py ├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored) ├── formulas/ Issue templates (TOML specs for multi-step agent tasks) @@ -93,8 +90,7 @@ bash dev/phase-test.sh | Supervisor | `supervisor/` | Health monitoring | [supervisor/AGENTS.md](supervisor/AGENTS.md) | | Planner | `planner/` | Strategic planning | [planner/AGENTS.md](planner/AGENTS.md) | | Predictor | `predictor/` | Infrastructure pattern detection | [predictor/AGENTS.md](predictor/AGENTS.md) | - -> **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77). +| Vault | `vault/` | Action gating + resource procurement | [vault/AGENTS.md](vault/AGENTS.md) | See [lib/AGENTS.md](lib/AGENTS.md) for the full shared helper reference. @@ -167,7 +163,7 @@ Humans write these. Agents read and enforce them. | AD-003 | The runtime creates and destroys, the formula preserves. | Runtime manages worktrees/sessions/temp. Formulas commit knowledge to git before signaling done. | | AD-004 | Event-driven > polling > fixed delays. | Never `waitForTimeout` or hardcoded sleep. Use phase files, webhooks, or poll loops with backoff. | | AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (both SOPS-encrypted). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. | -| AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. (Vault redesign in progress: PR-based approval on ops repo, see #73-#77) | +| AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. | **Who enforces what:** - **Gardener** checks open backlog issues against ADs during grooming; closes violations with a comment referencing the AD number. diff --git a/README.md b/README.md index f6a7165..abb47a1 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,9 @@ cron (daily) ──→ gardener-poll.sh ← backlog grooming (duplicates, stale cron (weekly) ──→ planner-poll.sh ← gap-analyse VISION.md, create backlog issues └── claude -p: update AGENTS.md → create issues +cron (*/30) ──→ vault-poll.sh ← safety gate for dangerous/irreversible actions + └── claude -p: classify → auto-approve/reject or escalate + ``` ## Prerequisites @@ -93,6 +96,7 @@ crontab -e # 3,13,23,33,43,53 * * * * /path/to/disinto/review/review-poll.sh # 6,16,26,36,46,56 * * * * /path/to/disinto/dev/dev-poll.sh # 15 8 * * * /path/to/disinto/gardener/gardener-poll.sh +# 0,30 * * * * /path/to/disinto/vault/vault-poll.sh # 0 9 * * 1 /path/to/disinto/planner/planner-poll.sh # 4. Verify @@ -121,7 +125,10 @@ disinto/ │ ├── planner-poll.sh # Cron entry: weekly vision gap analysis │ └── (formula-driven) # run-planner.toml executed by dispatcher ├── vault/ -│ └── vault-env.sh # Shared env setup (vault redesign in progress, see #73-#77) +│ ├── vault-poll.sh # Cron entry: process pending dangerous actions +│ ├── vault-agent.sh # Classifies and routes actions (claude -p) +│ ├── vault-fire.sh # Executes an approved action +│ └── vault-reject.sh # Marks an action as rejected └── supervisor/ ├── supervisor-poll.sh # Supervisor: health checks + claude -p ├── update-prompt.sh # Self-learning: append to best-practices @@ -144,8 +151,7 @@ disinto/ | **Review** | Every 10 min | Finds PRs without review, runs Claude-powered code review, approves or requests changes. | | **Gardener** | Daily | Grooms the issue backlog: detects duplicates, promotes `tech-debt` to `backlog`, closes stale issues, escalates ambiguous items. | | **Planner** | Weekly | Updates AGENTS.md documentation to reflect recent code changes, then gap-analyses VISION.md vs current state and creates up to 5 backlog issues for the highest-leverage gaps. | - -> **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77). +| **Vault** | Every 30 min | Safety gate for dangerous or irreversible actions. Classifies pending actions via Claude: auto-approve, auto-reject, or escalate to a human via vault/forge. | ## Design Principles diff --git a/bin/disinto b/bin/disinto index 61d122f..7a30cc4 100755 --- a/bin/disinto +++ b/bin/disinto @@ -263,8 +263,8 @@ services: FORGE_URL: http://forgejo:3000 DISINTO_CONTAINER: "1" PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} - # Vault redesign in progress (PR-based approval, see #73-#77) - # This container is being replaced — entrypoint will be updated in follow-up + # env_file set at runtime by: disinto run --env-file + entrypoint: ["bash", "/home/agent/disinto/vault/run-action.sh"] networks: - disinto-net diff --git a/formulas/groom-backlog.toml b/formulas/groom-backlog.toml index 39a147f..7915a80 100644 --- a/formulas/groom-backlog.toml +++ b/formulas/groom-backlog.toml @@ -203,7 +203,7 @@ If all tiers clear, write the completion summary and signal done: echo "ACTION: grooming complete — 0 tech-debt remaining" >> "$RESULT_FILE" echo 'PHASE:done' > "$PHASE_FILE" -Vault items filed during this run appear as PRs on ops repo for human approval. +Vault items filed during this run are picked up by vault-poll automatically. On unrecoverable error (API unavailable, repeated failures): printf 'PHASE:failed\nReason: %s\n' 'describe what failed' > "$PHASE_FILE" diff --git a/formulas/review-pr.toml b/formulas/review-pr.toml index 614200a..2c02e17 100644 --- a/formulas/review-pr.toml +++ b/formulas/review-pr.toml @@ -128,7 +128,8 @@ Scan the diff for these patterns: If ANY of these patterns appear in agent code (scripts in `dev/`, `action/`, `planner/`, `gardener/`, `supervisor/`, `predictor/`, `review/`, `formulas/`, -`lib/`) WITHOUT routing through vault dispatch (file a vault PR on ops repo — see #73-#77), **REQUEST_CHANGES**. +`lib/`) WITHOUT routing through vault dispatch (`$OPS_REPO_ROOT/vault/pending/`, `vault-fire.sh`, +`run-action.sh`), **REQUEST_CHANGES**. Explain that external actions must use vault dispatch per AD-006. The agent should file a vault item instead of executing directly. diff --git a/formulas/run-supervisor.toml b/formulas/run-supervisor.toml index 20b1015..67359f4 100644 --- a/formulas/run-supervisor.toml +++ b/formulas/run-supervisor.toml @@ -159,7 +159,7 @@ human judgment, file a vault procurement item: ## Unblocks - Factory health: - Vault PR filed on ops repo — human approves via PR review. + The vault-poll will notify the human and track the request. Read the relevant best-practices file before taking action: cat "$OPS_REPO_ROOT/knowledge/memory.md" # P0 diff --git a/formulas/run-vault.toml b/formulas/run-vault.toml new file mode 100644 index 0000000..2b8c4e0 --- /dev/null +++ b/formulas/run-vault.toml @@ -0,0 +1,104 @@ +# formulas/run-vault.toml — Vault agent formula (action gating + classification) +# +# Source of truth for the vault agent's classification and routing logic. +# Used by vault/vault-agent.sh via claude -p when pending actions exist. +# +# The vault handles two kinds of items: +# A. Action Gating (*.json) — classified and routed by this formula +# B. Procurement Requests (*.md) — handled by vault-poll.sh + human +# +# This formula covers Pipeline A only. + +name = "run-vault" +description = "Vault action gating: classify pending actions, route by risk" +version = 1 +model = "sonnet" + +[context] +files = ["AGENTS.md"] + +[[steps]] +id = "classify-and-route" +title = "Classify and route all pending vault actions" +description = """ +You are the vault agent. For each pending JSON action, decide: +**auto-approve**, **escalate**, or **reject**. + +## Two Pipelines + +### A. Action Gating (*.json) +Actions from agents that need safety classification before execution. +You classify and route these: auto-approve, escalate, or reject. + +### B. Procurement Requests (*.md) +Resource requests from the planner. These always escalate to the human — +you do NOT auto-approve or reject procurement requests. The human fulfills +the request (creates accounts, provisions infra, adds secrets to .env) +and moves the file from $OPS_REPO_ROOT/vault/pending/ to $OPS_REPO_ROOT/vault/approved/. +vault-fire.sh then writes the RESOURCES.md entry. + +## Routing Table (risk x reversibility) + +| Risk | Reversible | Route | +|----------|------------|---------------------------------------------| +| low | true | auto-approve -> fire immediately | +| low | false | auto-approve -> fire, log prominently | +| medium | true | auto-approve -> fire, notify via vault/forge | +| medium | false | escalate via vault/forge -> wait for human reply | +| high | any | always escalate -> wait for human reply | + +## Rules + +1. **Never lower risk.** You may override the source agent's self-assessed + risk *upward*, never downward. If a blog-post looks like it contains + pricing claims, bump it to medium or high. +2. **requires_human: true always escalates.** Regardless of risk level. +3. **Unknown action types -> reject** with reason unknown_type. +4. **Malformed JSON -> reject** with reason malformed. +5. **Payload validation:** Check that the payload has the minimum required + fields for the action type. Missing fields -> reject with reason. +6. **Procurement requests (*.md) -> skip.** These are handled by the human + directly. Do not attempt to classify, approve, or reject them. + +## Action Type Defaults + +| Type | Default Risk | Default Reversible | +|------------------|-------------|-------------------| +| blog-post | low | yes | +| social-post | medium | yes | +| email-blast | high | no | +| pricing-change | high | partial | +| dns-change | high | partial | +| webhook-call | medium | depends | +| stripe-charge | high | no | + +## Available Tools + +You have shell access. Use these for routing decisions: + +source ${FACTORY_ROOT}/lib/env.sh + +### Auto-approve and fire +bash ${FACTORY_ROOT}/vault/vault-fire.sh + +### Escalate +echo "PHASE:escalate" > "$PHASE_FILE" + +### Reject +bash ${FACTORY_ROOT}/vault/vault-reject.sh "" + +## Output Format + +After processing each action, print exactly: + +ROUTE: -> -- + +## Important + +- Process ALL pending JSON actions in the batch. Never skip silently. +- For auto-approved actions, fire them immediately via vault-fire.sh. +- For escalated actions, move to $OPS_REPO_ROOT/vault/approved/ only AFTER human approval. +- Read the action JSON carefully. Check the payload, not just the metadata. +- Ignore .md files in pending/ -- those are procurement requests handled + separately by vault-poll.sh and the human. +""" diff --git a/lib/AGENTS.md b/lib/AGENTS.md index fc8ffd0..cb558bc 100644 --- a/lib/AGENTS.md +++ b/lib/AGENTS.md @@ -7,7 +7,7 @@ sourced as needed. | File | What it provides | Sourced by | |---|---|---| | `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. | Every agent | -| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status ` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number ` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote ` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). | dev-poll, review-poll, review-pr, supervisor-poll | +| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status ` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number ` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote ` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this). | dev-poll, review-poll, review-pr, supervisor-poll | | `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) | | `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) | | `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll | diff --git a/site/docs/architecture.html b/site/docs/architecture.html index 2ab1a2f..c35edf3 100644 --- a/site/docs/architecture.html +++ b/site/docs/architecture.html @@ -399,8 +399,8 @@
vault
-
Being redesigned. Moving to PR-based approval workflow on ops repo. See issues #73-#77.
-
Redesign in progress
+
Safety gate. Reviews dangerous actions before they execute. Auto-approves safe operations, escalates risky ones to a human.
+
Event-driven
@@ -446,11 +446,12 @@
-

Vault — being redesigned

+

Vault — quality gate

-
Redesign in progress
-

The vault is being redesigned as a PR-based approval workflow on the ops repo. Instead of polling pending files, vault items will be created as PRs that require admin approval before execution.

-

See issues #73-#77 for the design: #75 defines the vault.sh helper for creating vault PRs, #76 rewrites the dispatcher to poll for merged vault PRs, #77 adds branch protection requiring admin approval.

+
How it works
+

The vault sits between agents and dangerous actions. Before an agent can execute a risky operation (force push, deploy, delete), the vault reviews the request.

+

Auto-approve — safe, well-understood operations pass through instantly. Escalate — risky or novel operations get sent to a human via Matrix. Reject — clearly unsafe actions are blocked.

+

You define the boundaries. The vault enforces them. This is what lets you sleep while the factory runs.

@@ -518,7 +519,7 @@ disinto/ ├── predictor/ predictor-run.sh (daily cron executor) ├── planner/ planner-run.sh (weekly cron executor) ├── supervisor/ supervisor-run.sh (health monitoring) -├── vault/ vault-env.sh (vault redesign in progress, see #73-#77) +├── vault/ vault-poll.sh, vault-agent.sh, vault-fire.sh ├── lib/ env.sh, agent-session.sh, ci-helpers.sh ├── projects/ *.toml per-project config ├── formulas/ TOML specs for multi-step agent tasks diff --git a/vault/.locks/.gitkeep b/vault/.locks/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vault/AGENTS.md b/vault/AGENTS.md new file mode 100644 index 0000000..879e645 --- /dev/null +++ b/vault/AGENTS.md @@ -0,0 +1,45 @@ + +# Vault Agent + +**Role**: Three-pipeline gate — action safety classification, resource procurement, and human-action drafting. + +**Pipeline A — Action Gating (*.json)**: Actions enter a pending queue and are +classified by Claude via `vault-agent.sh`, which can auto-approve (call +`vault-fire.sh` directly), auto-reject (call `vault-reject.sh`), or escalate +to a human by writing `PHASE:escalate` to a phase file — using the same +unified escalation path as dev/action agents. + +**Pipeline B — Procurement (*.md)**: The planner files resource requests as +markdown files in `$OPS_REPO_ROOT/vault/pending/`. `vault-poll.sh` notifies the human via +vault/forge. The human fulfills the request (creates accounts, provisions infra, +adds secrets to `.env`) and moves the file to `$OPS_REPO_ROOT/vault/approved/`. +`vault-fire.sh` then extracts the proposed entry and appends it to +`$OPS_REPO_ROOT/RESOURCES.md`. + +**Pipeline C — Rent-a-Human (outreach drafts)**: Any agent can dispatch the +`run-rent-a-human` formula (via an `action` issue) when a task requires a human +touch — posting on Reddit, commenting on HN, signing up for a service, etc. +Claude drafts copy-paste-ready content to `vault/outreach/{platform}/drafts/` +and notifies the human via vault/forge for one-click execution. No vault approval +needed — the human reviews and publishes directly. + +**Trigger**: `vault-poll.sh` runs every 30 min via cron. + +**Key files**: +- `vault/vault-poll.sh` — Processes pending items: retry approved, auto-reject after 48h timeout, invoke vault-agent for JSON actions, notify human for procurement requests +- `vault/vault-agent.sh` — Classifies and routes pending JSON actions via `claude -p`: auto-approve, auto-reject, or escalate to human +- `vault/vault-env.sh` — Shared env setup for vault sub-scripts: sources `lib/env.sh`, overrides `FORGE_TOKEN` with `FORGE_VAULT_TOKEN`, sets `VAULT_TOKEN` for runner container +- `formulas/run-vault.toml` — Source-of-truth formula for the vault agent's classification and routing logic +- `vault/vault-fire.sh` — Executes an approved action (JSON) in an **ephemeral Docker container** with vault-only secrets injected (GITHUB_TOKEN, CLAWHUB_TOKEN — never exposed to agents). For deployment actions, calls `lib/ci-helpers.sh:ci_promote()` to gate production promotes via Woodpecker environments. Writes `$OPS_REPO_ROOT/RESOURCES.md` entry for procurement MD approvals. +- `vault/vault-reject.sh` — Marks a JSON action as rejected +- `formulas/run-rent-a-human.toml` — Formula for human-action drafts: Claude researches target platform norms, drafts copy-paste content, writes to `vault/outreach/{platform}/drafts/`, notifies human via vault/forge + +**Procurement flow** (all vault items live in `$OPS_REPO_ROOT/vault/`): +1. Planner drops `$OPS_REPO_ROOT/vault/pending/.md` with what/why/proposed RESOURCES.md entry +2. `vault-poll.sh` notifies human via vault/forge +3. Human fulfills: creates account, adds secrets to `.env`, moves file to `approved/` +4. `vault-fire.sh` extracts proposed entry, appends to `$OPS_REPO_ROOT/RESOURCES.md`, moves to `fired/` +5. Next planner run reads RESOURCES.md → new capability available → unblocks prerequisite tree + +**Environment variables consumed**: +- All from `lib/env.sh` diff --git a/vault/run-action.sh b/vault/run-action.sh new file mode 100755 index 0000000..b051511 --- /dev/null +++ b/vault/run-action.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# run-action.sh — Execute an action inside the ephemeral runner container +# +# This script is the entrypoint for the runner container. It runs with +# vault secrets injected as environment variables (GITHUB_TOKEN, CLAWHUB_TOKEN, +# deploy keys, etc.) and dispatches to the appropriate action handler. +# +# The runner container is ephemeral: it starts, runs the action, and is +# destroyed. Secrets exist only in container memory, never on disk. +# +# Usage: run-action.sh + +set -euo pipefail + +VAULT_SCRIPT_DIR="${DISINTO_VAULT_DIR:-/home/agent/disinto/vault}" +OPS_VAULT_DIR="${DISINTO_OPS_VAULT_DIR:-${VAULT_SCRIPT_DIR}}" +LOGFILE="${VAULT_SCRIPT_DIR}/vault.log" +ACTION_ID="${1:?Usage: run-action.sh }" + +log() { + printf '[%s] runner: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" 2>/dev/null || \ + printf '[%s] runner: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2 +} + +# Find action file in approved/ +ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" +if [ ! -f "$ACTION_FILE" ]; then + log "ERROR: action file not found: ${ACTION_FILE}" + echo "ERROR: action file not found: ${ACTION_FILE}" >&2 + exit 1 +fi + +ACTION_TYPE=$(jq -r '.type // ""' < "$ACTION_FILE") +ACTION_SOURCE=$(jq -r '.source // ""' < "$ACTION_FILE") +PAYLOAD=$(jq -c '.payload // {}' < "$ACTION_FILE") + +if [ -z "$ACTION_TYPE" ]; then + log "ERROR: ${ACTION_ID} has no type field" + exit 1 +fi + +log "${ACTION_ID}: executing type=${ACTION_TYPE} source=${ACTION_SOURCE}" + +FIRE_EXIT=0 + +case "$ACTION_TYPE" in + webhook-call) + # HTTP call to endpoint with optional method/headers/body + ENDPOINT=$(echo "$PAYLOAD" | jq -r '.endpoint // ""') + METHOD=$(echo "$PAYLOAD" | jq -r '.method // "POST"') + REQ_BODY=$(echo "$PAYLOAD" | jq -r '.body // ""') + + if [ -z "$ENDPOINT" ]; then + log "ERROR: ${ACTION_ID} webhook-call missing endpoint" + exit 1 + fi + + CURL_ARGS=(-sf -X "$METHOD" -o /dev/null -w "%{http_code}") + while IFS= read -r header; do + [ -n "$header" ] && CURL_ARGS+=(-H "$header") + done < <(echo "$PAYLOAD" | jq -r '.headers // {} | to_entries[] | "\(.key): \(.value)"' 2>/dev/null || true) + if [ -n "$REQ_BODY" ] && [ "$REQ_BODY" != "null" ]; then + CURL_ARGS+=(-d "$REQ_BODY") + fi + + HTTP_CODE=$(curl "${CURL_ARGS[@]}" "$ENDPOINT" 2>/dev/null) || HTTP_CODE="000" + if [[ "$HTTP_CODE" =~ ^2 ]]; then + log "${ACTION_ID}: webhook-call -> HTTP ${HTTP_CODE} OK" + else + log "ERROR: ${ACTION_ID} webhook-call -> HTTP ${HTTP_CODE}" + FIRE_EXIT=1 + fi + ;; + + promote) + # Promote a Woodpecker pipeline to a deployment environment (staging/production). + # Payload: {"repo_id": N, "pipeline": N, "environment": "staging"|"production"} + PROMOTE_REPO_ID=$(echo "$PAYLOAD" | jq -r '.repo_id // ""') + PROMOTE_PIPELINE=$(echo "$PAYLOAD" | jq -r '.pipeline // ""') + PROMOTE_ENV=$(echo "$PAYLOAD" | jq -r '.environment // ""') + + if [ -z "$PROMOTE_REPO_ID" ] || [ -z "$PROMOTE_PIPELINE" ] || [ -z "$PROMOTE_ENV" ]; then + log "ERROR: ${ACTION_ID} promote missing repo_id, pipeline, or environment" + FIRE_EXIT=1 + else + # Validate environment is staging or production + case "$PROMOTE_ENV" in + staging|production) ;; + *) + log "ERROR: ${ACTION_ID} promote invalid environment '${PROMOTE_ENV}' (must be staging or production)" + FIRE_EXIT=1 + ;; + esac + + if [ "$FIRE_EXIT" -eq 0 ]; then + WP_SERVER="${WOODPECKER_SERVER:-http://woodpecker:8000}" + WP_TOKEN="${WOODPECKER_TOKEN:-}" + + if [ -z "$WP_TOKEN" ]; then + log "ERROR: ${ACTION_ID} promote requires WOODPECKER_TOKEN" + FIRE_EXIT=1 + else + PROMOTE_RESP=$(curl -sf -X POST \ + -H "Authorization: Bearer ${WP_TOKEN}" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "event=deployment&deploy_to=${PROMOTE_ENV}" \ + "${WP_SERVER}/api/repos/${PROMOTE_REPO_ID}/pipelines/${PROMOTE_PIPELINE}" 2>/dev/null) || PROMOTE_RESP="" + + NEW_PIPELINE=$(printf '%s' "$PROMOTE_RESP" | jq -r '.number // empty' 2>/dev/null) + if [ -n "$NEW_PIPELINE" ]; then + log "${ACTION_ID}: promoted pipeline ${PROMOTE_PIPELINE} to ${PROMOTE_ENV} -> new pipeline #${NEW_PIPELINE}" + else + log "ERROR: ${ACTION_ID} promote API failed (repo_id=${PROMOTE_REPO_ID} pipeline=${PROMOTE_PIPELINE} env=${PROMOTE_ENV})" + FIRE_EXIT=1 + fi + fi + fi + fi + ;; + + blog-post|social-post|email-blast|pricing-change|dns-change|stripe-charge) + HANDLER="${VAULT_SCRIPT_DIR}/handlers/${ACTION_TYPE}.sh" + if [ -x "$HANDLER" ]; then + bash "$HANDLER" "$ACTION_ID" "$PAYLOAD" 2>&1 || FIRE_EXIT=$? + else + log "ERROR: ${ACTION_ID} no handler for type '${ACTION_TYPE}' (${HANDLER} not found)" + FIRE_EXIT=1 + fi + ;; + + *) + log "ERROR: ${ACTION_ID} unknown action type '${ACTION_TYPE}'" + FIRE_EXIT=1 + ;; +esac + +exit "$FIRE_EXIT" diff --git a/vault/vault-agent.sh b/vault/vault-agent.sh new file mode 100755 index 0000000..3f85042 --- /dev/null +++ b/vault/vault-agent.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# vault-agent.sh — Invoke claude -p to classify and route pending vault actions +# +# Called by vault-poll.sh when pending actions exist. Reads all pending/*.json, +# builds a prompt with action summaries, and lets the LLM decide routing. +# +# The LLM can call vault-fire.sh (auto-approve) or vault-reject.sh (reject) +# directly. For escalations, it writes a PHASE:escalate file and marks the +# action as "escalated" in pending/ so vault-poll skips it on future runs. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "${SCRIPT_DIR}/vault-env.sh" + +VAULT_SCRIPT_DIR="${FACTORY_ROOT}/vault" +OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" +PROMPT_FILE="${FACTORY_ROOT}/formulas/run-vault.toml" +LOGFILE="${VAULT_SCRIPT_DIR}/vault.log" +CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}" + +log() { + printf '[%s] vault-agent: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +# Collect all pending actions (skip already-escalated) +ACTIONS_BATCH="" +ACTION_COUNT=0 + +for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do + [ -f "$action_file" ] || continue + + ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null) + [ "$ACTION_STATUS" = "escalated" ] && continue + + # Validate JSON + if ! jq empty < "$action_file" 2>/dev/null; then + ACTION_ID=$(basename "$action_file" .json) + log "malformed JSON: $action_file — rejecting" + bash "${VAULT_SCRIPT_DIR}/vault-reject.sh" "$ACTION_ID" "malformed JSON" 2>/dev/null || true + continue + fi + + ACTION_JSON=$(cat "$action_file") + ACTIONS_BATCH="${ACTIONS_BATCH} +--- ACTION --- +$(echo "$ACTION_JSON" | jq '.') +--- END ACTION --- +" + ACTION_COUNT=$((ACTION_COUNT + 1)) +done + +if [ "$ACTION_COUNT" -eq 0 ]; then + log "no actionable pending items" + exit 0 +fi + +log "processing $ACTION_COUNT pending action(s) via claude -p" + +# Build the prompt +SYSTEM_PROMPT=$(cat "$PROMPT_FILE" 2>/dev/null || echo "You are a vault agent. Classify and route actions.") + +PROMPT="${SYSTEM_PROMPT} + +## Pending Actions (${ACTION_COUNT} total) +${ACTIONS_BATCH} + +## Environment +- FACTORY_ROOT=${FACTORY_ROOT} +- OPS_REPO_ROOT=${OPS_REPO_ROOT} +- Vault data: ${OPS_VAULT_DIR} +- vault-fire.sh: bash ${VAULT_SCRIPT_DIR}/vault-fire.sh +- vault-reject.sh: bash ${VAULT_SCRIPT_DIR}/vault-reject.sh \"\" + +Process each action now. For auto-approve, fire immediately. For reject, call vault-reject.sh. + +For actions that need human approval (escalate), write a PHASE:escalate file +to signal the unified escalation path: + printf 'PHASE:escalate\nReason: vault procurement — %s\n' '' \\ + > /tmp/vault-escalate-.phase +Then STOP and wait — a human will review via the forge." + +CLAUDE_OUTPUT=$(timeout "$CLAUDE_TIMEOUT" claude -p "$PROMPT" \ + --model sonnet \ + --dangerously-skip-permissions \ + --max-turns 20 \ + 2>/dev/null) || true + +log "claude finished ($(echo "$CLAUDE_OUTPUT" | wc -c) bytes)" + +# Log routing decisions +ROUTES=$(echo "$CLAUDE_OUTPUT" | grep "^ROUTE:" || true) +if [ -n "$ROUTES" ]; then + echo "$ROUTES" | while read -r line; do + log " $line" + done +fi diff --git a/vault/vault-env.sh b/vault/vault-env.sh index 459d214..66b87d1 100644 --- a/vault/vault-env.sh +++ b/vault/vault-env.sh @@ -8,5 +8,5 @@ source "$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/lib/env.sh" # Use vault-bot's own Forgejo identity FORGE_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" -# Vault redesign in progress (PR-based approval workflow) -# This file is kept for shared env setup; scripts being replaced by #73 +# Set entrypoint for runner container +export VAULT_RUNNER_ENTRYPOINT="run-action.sh" diff --git a/vault/vault-fire.sh b/vault/vault-fire.sh new file mode 100755 index 0000000..79c1d46 --- /dev/null +++ b/vault/vault-fire.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# vault-fire.sh — Execute an approved vault item by ID +# +# Handles two pipelines: +# A. Action gating (*.json): pending/ → approved/ → fired/ +# Execution delegated to ephemeral runner container via disinto run. +# The runner gets vault secrets (.env.vault.enc); this script does NOT. +# B. Procurement (*.md): approved/ → fired/ (writes RESOURCES.md entry) +# +# If item is in pending/, moves to approved/ first. +# If item is already in approved/, fires directly (crash recovery). +# +# Usage: bash vault-fire.sh + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "${SCRIPT_DIR}/vault-env.sh" + +OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" +LOCKS_DIR="${DISINTO_LOG_DIR}/vault/.locks" +LOGFILE="${DISINTO_LOG_DIR}/vault/vault.log" +RESOURCES_FILE="${OPS_REPO_ROOT}/RESOURCES.md" + +log() { + printf '[%s] vault-fire: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +ACTION_ID="${1:?Usage: vault-fire.sh }" + +# ============================================================================= +# Detect pipeline: procurement (.md) or action gating (.json) +# ============================================================================= +IS_PROCUREMENT=false +ACTION_FILE="" + +if [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" ]; then + IS_PROCUREMENT=true + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" +elif [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.md" ]; then + IS_PROCUREMENT=true + mv "${OPS_VAULT_DIR}/pending/${ACTION_ID}.md" "${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" + log "$ACTION_ID: pending → approved (procurement)" +elif [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ]; then + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" +elif [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" ]; then + mv "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" + TMP=$(mktemp) + jq '.status = "approved"' "$ACTION_FILE" > "$TMP" && mv "$TMP" "$ACTION_FILE" + log "$ACTION_ID: pending → approved" +else + log "ERROR: item $ACTION_ID not found in pending/ or approved/" + exit 1 +fi + +# Acquire lock +mkdir -p "$LOCKS_DIR" +LOCKFILE="${LOCKS_DIR}/${ACTION_ID}.lock" +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || true) + if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then + log "$ACTION_ID: already being fired by PID $LOCK_PID" + exit 0 + fi +fi +echo $$ > "$LOCKFILE" +trap 'rm -f "$LOCKFILE"' EXIT + +# ============================================================================= +# Pipeline A: Procurement — extract RESOURCES.md entry and append +# ============================================================================= +if [ "$IS_PROCUREMENT" = true ]; then + log "$ACTION_ID: firing procurement request" + + # Extract the proposed RESOURCES.md entry from the markdown file. + # Everything after the "## Proposed RESOURCES.md Entry" heading to EOF. + # Uses awk because the entry itself contains ## headings (## ). + ENTRY="" + ENTRY=$(awk '/^## Proposed RESOURCES\.md Entry/{found=1; next} found{print}' "$ACTION_FILE" 2>/dev/null || true) + + # Strip leading/trailing blank lines and markdown code fences + ENTRY=$(echo "$ENTRY" | sed '/^```/d' | sed -e '/./,$!d' -e :a -e '/^\n*$/{$d;N;ba;}') + + if [ -z "$ENTRY" ]; then + log "ERROR: $ACTION_ID has no '## Proposed RESOURCES.md Entry' section" + exit 1 + fi + + # Append entry to RESOURCES.md + printf '\n%s\n' "$ENTRY" >> "$RESOURCES_FILE" + log "$ACTION_ID: wrote RESOURCES.md entry" + + # Move to fired/ + mv "$ACTION_FILE" "${OPS_VAULT_DIR}/fired/${ACTION_ID}.md" + rm -f "${LOCKS_DIR}/${ACTION_ID}.notified" + log "$ACTION_ID: approved → fired (procurement)" + exit 0 +fi + +# ============================================================================= +# Pipeline B: Action gating — delegate to ephemeral runner container +# ============================================================================= +ACTION_TYPE=$(jq -r '.type // ""' < "$ACTION_FILE") +ACTION_SOURCE=$(jq -r '.source // ""' < "$ACTION_FILE") + +if [ -z "$ACTION_TYPE" ]; then + log "ERROR: $ACTION_ID has no type field" + exit 1 +fi + +log "$ACTION_ID: firing type=$ACTION_TYPE source=$ACTION_SOURCE via runner" + +FIRE_EXIT=0 + +# Delegate execution to the ephemeral runner container. +# The runner gets vault secrets (.env.vault.enc) injected at runtime; +# this host process never sees those secrets. +if [ -f "${FACTORY_ROOT}/.env.vault.enc" ] && [ -f "${FACTORY_ROOT}/docker-compose.yml" ]; then + bash "${FACTORY_ROOT}/bin/disinto" run "$ACTION_ID" >> "$LOGFILE" 2>&1 || FIRE_EXIT=$? +else + # Fallback for bare-metal or pre-migration setups: run action handler directly + log "$ACTION_ID: no .env.vault.enc or docker-compose.yml — running action directly" + bash "${SCRIPT_DIR}/run-action.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1 || FIRE_EXIT=$? +fi + +# ============================================================================= +# Move to fired/ or leave in approved/ on failure +# ============================================================================= +if [ "$FIRE_EXIT" -eq 0 ]; then + # Update with fired timestamp and move to fired/ + TMP=$(mktemp) + jq --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" '.status = "fired" | .fired_at = $ts' "$ACTION_FILE" > "$TMP" \ + && mv "$TMP" "${OPS_VAULT_DIR}/fired/${ACTION_ID}.json" + rm -f "$ACTION_FILE" + log "$ACTION_ID: approved → fired" +else + log "ERROR: $ACTION_ID fire failed (exit $FIRE_EXIT) — stays in approved/ for retry" + exit "$FIRE_EXIT" +fi diff --git a/vault/vault-poll.sh b/vault/vault-poll.sh new file mode 100755 index 0000000..a32b31f --- /dev/null +++ b/vault/vault-poll.sh @@ -0,0 +1,301 @@ +#!/usr/bin/env bash +# vault-poll.sh — Vault: process pending actions + procurement requests +# +# Runs every 30min via cron. Two pipelines: +# A. Action gating (*.json): auto-approve/escalate/reject via vault-agent.sh +# B. Procurement (*.md): notify human, fire approved requests via vault-fire.sh +# +# Phases: +# 1. Retry any approved/ items that weren't fired (crash recovery) +# 2. Auto-reject escalations with no reply for 48h +# 3. Invoke vault-agent.sh for new pending JSON actions +# 4. Notify human about new pending procurement requests (.md) +# +# Cron: */30 * * * * /path/to/disinto/vault/vault-poll.sh +# +# Peek: cat /tmp/vault-status +# Log: tail -f /path/to/disinto/vault/vault.log + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "${SCRIPT_DIR}/../lib/env.sh" +# Use vault-bot's own Forgejo identity (#747) +FORGE_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" + +LOGFILE="${DISINTO_LOG_DIR}/vault/vault.log" +STATUSFILE="/tmp/vault-status" +LOCKFILE="/tmp/vault-poll.lock" +VAULT_SCRIPT_DIR="${FACTORY_ROOT}/vault" +OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" +LOCKS_DIR="${DISINTO_LOG_DIR}/vault/.locks" + +TIMEOUT_HOURS=48 + +# Prevent overlapping runs +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null) + if kill -0 "$LOCK_PID" 2>/dev/null; then + exit 0 + fi + rm -f "$LOCKFILE" +fi +echo $$ > "$LOCKFILE" +trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT + +log() { + printf '[%s] vault: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +status() { + printf '[%s] vault: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE" + log "$*" +} + +# Acquire per-action lock (returns 0 if acquired, 1 if already locked) +lock_action() { + local action_id="$1" + local lockfile="${LOCKS_DIR}/${action_id}.lock" + mkdir -p "$LOCKS_DIR" + if [ -f "$lockfile" ]; then + local lock_pid + lock_pid=$(cat "$lockfile" 2>/dev/null || true) + if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then + return 1 + fi + rm -f "$lockfile" + fi + echo $$ > "$lockfile" + return 0 +} + +unlock_action() { + local action_id="$1" + rm -f "${LOCKS_DIR}/${action_id}.lock" +} + +# ============================================================================= +# PHASE 1: Retry approved items (crash recovery — JSON actions + MD procurement) +# ============================================================================= +status "phase 1: retrying approved items" + +for action_file in "${OPS_VAULT_DIR}/approved/"*.json; do + [ -f "$action_file" ] || continue + ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null) + [ -z "$ACTION_ID" ] && continue + + if ! lock_action "$ACTION_ID"; then + log "skip $ACTION_ID — locked by another process" + continue + fi + + log "retrying approved action: $ACTION_ID" + if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then + log "fired $ACTION_ID (retry)" + else + log "ERROR: fire failed for $ACTION_ID (retry)" + fi + + unlock_action "$ACTION_ID" +done + +# Retry approved procurement requests (.md) +for req_file in "${OPS_VAULT_DIR}/approved/"*.md; do + [ -f "$req_file" ] || continue + REQ_ID=$(basename "$req_file" .md) + + if ! lock_action "$REQ_ID"; then + log "skip procurement $REQ_ID — locked by another process" + continue + fi + + log "retrying approved procurement: $REQ_ID" + if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$REQ_ID" >> "$LOGFILE" 2>&1; then + log "fired procurement $REQ_ID (retry)" + else + log "ERROR: fire failed for procurement $REQ_ID (retry)" + fi + + unlock_action "$REQ_ID" +done + +# ============================================================================= +# PHASE 2: Timeout escalations (48h no reply → auto-reject) +# ============================================================================= +status "phase 2: checking escalation timeouts" + +NOW_EPOCH=$(date +%s) +TIMEOUT_SECS=$((TIMEOUT_HOURS * 3600)) + +for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do + [ -f "$action_file" ] || continue + + ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null) + [ "$ACTION_STATUS" != "escalated" ] && continue + + ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null) + ESCALATED_AT=$(jq -r '.escalated_at // ""' < "$action_file" 2>/dev/null) + [ -z "$ESCALATED_AT" ] && continue + + ESCALATED_EPOCH=$(date -d "$ESCALATED_AT" +%s 2>/dev/null || echo 0) + AGE_SECS=$((NOW_EPOCH - ESCALATED_EPOCH)) + + if [ "$AGE_SECS" -gt "$TIMEOUT_SECS" ]; then + AGE_HOURS=$((AGE_SECS / 3600)) + log "timeout: $ACTION_ID escalated ${AGE_HOURS}h ago with no reply — auto-rejecting" + bash "${VAULT_SCRIPT_DIR}/vault-reject.sh" "$ACTION_ID" "timeout (${AGE_HOURS}h, no human reply)" >> "$LOGFILE" 2>&1 || true + fi +done + +# ============================================================================= +# PHASE 3: Process new pending actions (JSON — action gating) +# ============================================================================= +status "phase 3: processing pending actions" + +PENDING_COUNT=0 +PENDING_SUMMARY="" + +for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do + [ -f "$action_file" ] || continue + + ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null) + # Skip already-escalated actions (waiting for human reply) + [ "$ACTION_STATUS" = "escalated" ] && continue + + ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null) + [ -z "$ACTION_ID" ] && continue + + if ! lock_action "$ACTION_ID"; then + log "skip $ACTION_ID — locked" + continue + fi + + PENDING_COUNT=$((PENDING_COUNT + 1)) + ACTION_TYPE=$(jq -r '.type // "unknown"' < "$action_file" 2>/dev/null) + ACTION_SOURCE=$(jq -r '.source // "unknown"' < "$action_file" 2>/dev/null) + PENDING_SUMMARY="${PENDING_SUMMARY} ${ACTION_ID} [${ACTION_TYPE}] from ${ACTION_SOURCE}\n" + + unlock_action "$ACTION_ID" +done + +if [ "$PENDING_COUNT" -gt 0 ]; then + log "found $PENDING_COUNT pending action(s), invoking vault-agent" + status "invoking vault-agent for $PENDING_COUNT action(s)" + + bash "${VAULT_SCRIPT_DIR}/vault-agent.sh" >> "$LOGFILE" 2>&1 || { + log "ERROR: vault-agent failed" + } +fi + +# ============================================================================= +# PHASE 4: Notify human about new pending procurement requests (.md) +# ============================================================================= +status "phase 4: processing pending procurement requests" + +PROCURE_COUNT=0 + +for req_file in "${OPS_VAULT_DIR}/pending/"*.md; do + [ -f "$req_file" ] || continue + REQ_ID=$(basename "$req_file" .md) + + # Check if already notified (marker file) + if [ -f "${LOCKS_DIR}/${REQ_ID}.notified" ]; then + continue + fi + + if ! lock_action "$REQ_ID"; then + log "skip procurement $REQ_ID — locked" + continue + fi + + PROCURE_COUNT=$((PROCURE_COUNT + 1)) + + # Extract title from first heading + REQ_TITLE=$(grep -m1 '^# ' "$req_file" | sed 's/^# //' || echo "$REQ_ID") + + log "new procurement request: $REQ_ID — $REQ_TITLE" + + # Mark as notified so we don't re-send + mkdir -p "${LOCKS_DIR}" + touch "${LOCKS_DIR}/${REQ_ID}.notified" + + unlock_action "$REQ_ID" +done + +# ============================================================================= +# PHASE 5: Detect vault-bot authorized comments on issues +# ============================================================================= +status "phase 5: scanning for vault-bot authorized comments" + +COMMENT_COUNT=0 + +if [ -n "${FORGE_REPO:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then + # Get open issues with action label + ACTION_ISSUES=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/repos/${FORGE_REPO}/issues?state=open&labels=action&limit=50" 2>/dev/null) || ACTION_ISSUES="[]" + + ISSUE_COUNT=$(printf '%s' "$ACTION_ISSUES" | jq 'length') + for idx in $(seq 0 $((ISSUE_COUNT - 1))); do + ISSUE_NUM=$(printf '%s' "$ACTION_ISSUES" | jq -r ".[$idx].number") + + # Skip if already processed + if [ -f "${LOCKS_DIR}/issue-${ISSUE_NUM}.vault-fired" ]; then + continue + fi + + # Get comments on this issue + COMMENTS=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/repos/${FORGE_REPO}/issues/${ISSUE_NUM}/comments?limit=50" 2>/dev/null) || continue + + # Look for vault-bot comments containing VAULT:APPROVED with a JSON action spec + APPROVED_BODY=$(printf '%s' "$COMMENTS" | jq -r ' + [.[] | select(.user.login == "vault-bot") | select(.body | test("VAULT:APPROVED"))] | last | .body // empty + ' 2>/dev/null) || continue + + [ -z "$APPROVED_BODY" ] && continue + + # Extract JSON action spec from fenced code block in the comment + ACTION_JSON=$(printf '%s' "$APPROVED_BODY" | sed -n '/^```json$/,/^```$/p' | sed '1d;$d') + [ -z "$ACTION_JSON" ] && continue + + # Validate JSON + if ! printf '%s' "$ACTION_JSON" | jq empty 2>/dev/null; then + log "malformed action JSON in vault-bot comment on issue #${ISSUE_NUM}" + continue + fi + + ACTION_ID=$(printf '%s' "$ACTION_JSON" | jq -r '.id // empty') + if [ -z "$ACTION_ID" ]; then + ACTION_ID="issue-${ISSUE_NUM}-$(date +%s)" + ACTION_JSON=$(printf '%s' "$ACTION_JSON" | jq --arg id "$ACTION_ID" '.id = $id') + fi + + # Skip if this action already exists in any stage + if [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ] || \ + [ -f "${OPS_VAULT_DIR}/fired/${ACTION_ID}.json" ] || \ + [ -f "${OPS_VAULT_DIR}/rejected/${ACTION_ID}.json" ]; then + continue + fi + + log "vault-bot authorized action on issue #${ISSUE_NUM}: ${ACTION_ID}" + printf '%s' "$ACTION_JSON" | jq '.status = "approved"' > "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" + COMMENT_COUNT=$((COMMENT_COUNT + 1)) + + # Fire the action + if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then + log "fired ${ACTION_ID} from issue #${ISSUE_NUM}" + # Mark issue as processed + touch "${LOCKS_DIR}/issue-${ISSUE_NUM}.vault-fired" + else + log "ERROR: fire failed for ${ACTION_ID} from issue #${ISSUE_NUM}" + fi + done +fi + +if [ "$PENDING_COUNT" -eq 0 ] && [ "$PROCURE_COUNT" -eq 0 ] && [ "$COMMENT_COUNT" -eq 0 ]; then + status "all clear — no pending items" +else + status "poll complete — ${PENDING_COUNT} action(s), ${PROCURE_COUNT} procurement(s), ${COMMENT_COUNT} comment-authorized" +fi diff --git a/vault/vault-reject.sh b/vault/vault-reject.sh new file mode 100755 index 0000000..54fa127 --- /dev/null +++ b/vault/vault-reject.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# vault-reject.sh — Move a vault action to rejected/ with reason +# +# Usage: bash vault-reject.sh "" + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "${SCRIPT_DIR}/vault-env.sh" + +OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" +LOGFILE="${DISINTO_LOG_DIR}/vault/vault.log" +LOCKS_DIR="${DISINTO_LOG_DIR}/vault/.locks" + +log() { + printf '[%s] vault-reject: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +ACTION_ID="${1:?Usage: vault-reject.sh \"\"}" +REASON="${2:-unspecified}" + +# Find the action file +ACTION_FILE="" +if [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" ]; then + ACTION_FILE="${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" +elif [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ]; then + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" +else + log "ERROR: action $ACTION_ID not found in pending/ or approved/" + exit 1 +fi + +# Update with rejection metadata and move to rejected/ +TMP=$(mktemp) +jq --arg reason "$REASON" --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + '.status = "rejected" | .rejected_at = $ts | .reject_reason = $reason' \ + "$ACTION_FILE" > "$TMP" && mv "$TMP" "${OPS_VAULT_DIR}/rejected/${ACTION_ID}.json" +rm -f "$ACTION_FILE" + +# Clean up lock if present +rm -f "${LOCKS_DIR}/${ACTION_ID}.lock" + +log "$ACTION_ID: rejected — $REASON"